diff --git a/go.mod b/go.mod index 1d55190a8a..fb6f1cbb3b 100644 --- a/go.mod +++ b/go.mod @@ -21,7 +21,7 @@ require ( github.com/google/addlicense v1.2.0 github.com/google/gnostic-models v0.7.1 github.com/google/go-cmp v0.7.0 - github.com/google/go-containerregistry v0.21.6 + github.com/google/go-containerregistry v0.21.7 github.com/google/go-licenses/v2 v2.0.1 github.com/google/uuid v1.6.0 github.com/jstemmer/go-junit-report/v2 v2.1.0 @@ -84,7 +84,7 @@ require ( github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/chai2010/gettext-go v1.0.2 // indirect github.com/distribution/reference v0.6.0 // indirect - github.com/docker/cli v29.4.3+incompatible // indirect + github.com/docker/cli v29.5.3+incompatible // indirect github.com/docker/docker-credential-helpers v0.9.3 // indirect github.com/emicklei/go-restful/v3 v3.12.2 // indirect github.com/evanphx/json-patch/v5 v5.9.11 // indirect @@ -151,15 +151,15 @@ require ( go.uber.org/atomic v1.11.0 // indirect go.yaml.in/yaml/v2 v2.4.4 // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect - golang.org/x/crypto v0.51.0 // indirect - golang.org/x/net v0.55.0 // indirect + golang.org/x/crypto v0.53.0 // indirect + golang.org/x/net v0.56.0 // indirect golang.org/x/oauth2 v0.36.0 // indirect - golang.org/x/sync v0.20.0 // indirect - golang.org/x/sys v0.45.0 // indirect - golang.org/x/term v0.43.0 // indirect - golang.org/x/text v0.37.0 // indirect + golang.org/x/sync v0.21.0 // indirect + golang.org/x/sys v0.46.0 // indirect + golang.org/x/term v0.44.0 // indirect + golang.org/x/text v0.38.0 // indirect golang.org/x/time v0.15.0 // indirect - golang.org/x/tools v0.45.0 // indirect + golang.org/x/tools v0.46.0 // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect google.golang.org/genproto v0.0.0-20260319201613-d00831a3d3e7 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20260526163538-3dc84a4a5aaa // indirect diff --git a/go.sum b/go.sum index 9b599312c3..258d9d80ed 100644 --- a/go.sum +++ b/go.sum @@ -70,8 +70,8 @@ github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1 github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk= github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E= -github.com/docker/cli v29.4.3+incompatible h1:u+UliYm2J/rYrIh2FqHQg32neRG8GjbvNuwQRTzGspU= -github.com/docker/cli v29.4.3+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8= +github.com/docker/cli v29.5.3+incompatible h1:nbEFfz774vBwQ5KRYv7c/AghjReqnGISvrRhzjV0evs= +github.com/docker/cli v29.5.3+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8= github.com/docker/docker-credential-helpers v0.9.3 h1:gAm/VtF9wgqJMoxzT3Gj5p4AqIjCBS4wrsOh9yRqcz8= github.com/docker/docker-credential-helpers v0.9.3/go.mod h1:x+4Gbw9aGmChi3qTLZj8Dfn0TD20M/fuWy0E5+WDeCo= github.com/elliotchance/orderedmap/v2 v2.7.0 h1:WHuf0DRo63uLnldCPp9ojm3gskYwEdIIfAUVG5KhoOc= @@ -160,8 +160,8 @@ github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= -github.com/google/go-containerregistry v0.21.6 h1:T+yqQIlJXKrM98Om4DlW3GoWQAmhZuLMwoDOvVrtiUM= -github.com/google/go-containerregistry v0.21.6/go.mod h1:U7MMSBIJynke2MVQrQk19NP9k/uQsGz/h0amIFSHMbo= +github.com/google/go-containerregistry v0.21.7 h1:/vPFuVXDjtFREsVArW+0h1CIl5urnOhzei4X2DMW9IU= +github.com/google/go-containerregistry v0.21.7/go.mod h1:kjSbt7/zMsKLWfnHrIvKvhXHUw91jbe9DNjPPJ32gXE= github.com/google/go-licenses/v2 v2.0.1 h1:ti+9bi5o7DKbeeg5eBb/uZTgsaPNoJaLCh93cRcXsW8= github.com/google/go-licenses/v2 v2.0.1/go.mod h1:efibo0EDNGkau6AIMOViGW+rTNPudhxX9rCxtfw5zKE= github.com/google/go-replayers/httpreplay v1.2.0 h1:VM1wEyyjaoU53BwrOnaf9VhAyQQEEioJvFYxYcLRKzk= @@ -360,8 +360,8 @@ go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.51.0 h1:IBPXwPfKxY7cWQZ38ZCIRPI50YLeevDLlLnyC5wRGTI= -golang.org/x/crypto v0.51.0/go.mod h1:8AdwkbraGNABw2kOX6YFPs3WM22XqI4EXEd8g+x7Oc8= +golang.org/x/crypto v0.53.0 h1:QZ4Muo8THX6CizN2vPPd5fBGHyogrdK9fG4wLPFUsto= +golang.org/x/crypto v0.53.0/go.mod h1:DNLU434OwVakk9PzuwV8w62mAJpRJL3vsgcfp4Qnsio= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20241217172543-b2144cdd0a67 h1:1UoZQm6f0P/ZO0w1Ri+f+ifG/gXhegadRdwBIXEFWDo= golang.org/x/exp v0.0.0-20241217172543-b2144cdd0a67/go.mod h1:qj5a5QZpwLU2NLQudwIN5koi3beDhSAlJwa67PuM98c= @@ -376,8 +376,8 @@ golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73r golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.55.0 h1:bcvxaJn3e1U6InsFWt1JUq1aSjnRxLzT2rtD2KfkDF8= -golang.org/x/net v0.55.0/go.mod h1:L5U2KuzuOe1lY7Z+aWVIKK6qEeJXnXV9yzGA+WCHJww= +golang.org/x/net v0.56.0 h1:Rw8j/hFzGvJUZwNBXnAtf5sVDVt+65SK2C7IxCxZt5o= +golang.org/x/net v0.56.0/go.mod h1:D3Ku6r+V6JROoZK144D2XfMHFcMq/0zSfLelVTCFKec= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.36.0 h1:peZ/1z27fi9hUOFCAZaHyrpWG5lwe0RJEEEeH0ThlIs= golang.org/x/oauth2 v0.36.0/go.mod h1:YDBUJMTkDnJS+A4BP4eZBjCqtokkg1hODuPjwiGPO7Q= @@ -385,8 +385,8 @@ golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4= -golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0= +golang.org/x/sync v0.21.0 h1:HLII4xRRTtCRkxYp4HNFF0Js/Og6q2i++KXbg0gHCwM= +golang.org/x/sync v0.21.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -394,14 +394,14 @@ golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.45.0 h1:dO4czNzziLiiXplLQgBCEpCvXQ3dnkn0SdaZSYdQ+FY= -golang.org/x/sys v0.45.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= -golang.org/x/term v0.43.0 h1:S4RLU2sB31O/NCl+zFN9Aru9A/Cq2aqKpTZJ6B+DwT4= -golang.org/x/term v0.43.0/go.mod h1:lrhlHNdQJHO+1qVYiHfFKVuVioJIheAc3fBSMFYEIsk= +golang.org/x/sys v0.46.0 h1:noSf2Fq6F8DBgS+LysIkx7rIExoNHJsxOAtPp4rthXw= +golang.org/x/sys v0.46.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= +golang.org/x/term v0.44.0 h1:0rLvDRCtNj0gZkyIXhCyOb2OAzEhLVqc4B+hrsBhrmc= +golang.org/x/term v0.44.0/go.mod h1:7ze4MdzUzLXpSAoFP1H0bOI9aXDqveSvatT5vKcFh2Y= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.37.0 h1:Cqjiwd9eSg8e0QAkyCaQTNHFIIzWtidPahFWR83rTrc= -golang.org/x/text v0.37.0/go.mod h1:a5sjxXGs9hsn/AJVwuElvCAo9v8QYLzvavO5z2PiM38= +golang.org/x/text v0.38.0 h1:sXmwo9DwP3OK9EZ7PqAdaooSGozfl/3a6/xJcbzPRhE= +golang.org/x/text v0.38.0/go.mod h1:YXZt3QhHUKYT53r2lLKFIVi6Ao1jdzrTR/KQ09qyxF4= golang.org/x/time v0.15.0 h1:bbrp8t3bGUeFOx08pvsMYRTCVSMk89u4tKbNOZbp88U= golang.org/x/time v0.15.0/go.mod h1:Y4YMaQmXwGQZoFaVFk4YpCt4FLQMYKZe9oeV/f4MSno= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= @@ -409,8 +409,8 @@ golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGm golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= -golang.org/x/tools v0.45.0 h1:18qN3FAooORvApf5XjCXgsuayZOEtXf6JK18I3+ONa8= -golang.org/x/tools v0.45.0/go.mod h1:LuUGqqaXcXMEFEruIVJVm5mgDD8vww/z/SR1gQ4uE/0= +golang.org/x/tools v0.46.0 h1:7jTurBkPZu4moS/Uy4OQT1M+QBlsj3wejyZwsT8Z7rk= +golang.org/x/tools v0.46.0/go.mod h1:FrD85F8l+NWL+9XWBSyVSHO6Ne4jutsfIFba7AWQ5Ys= golang.org/x/tools/go/expect v0.1.1-deprecated h1:jpBZDwmgPhXsKZC6WhL20P4b/wmnpsEAGHaNy0n/rJM= golang.org/x/tools/go/expect v0.1.1-deprecated/go.mod h1:eihoPOH+FgIqa3FpoTwguz/bVUSGBlGQU67vpBeOrBY= golang.org/x/tools/go/packages/packagestest v0.1.1-deprecated h1:1h2MnaIAIXISqTFKdENegdpAgUXz6NrPEsbIeWaBRvM= diff --git a/vendor/github.com/docker/cli/cli/config/configfile/file.go b/vendor/github.com/docker/cli/cli/config/configfile/file.go index 1a0e5b46ca..26e148f059 100644 --- a/vendor/github.com/docker/cli/cli/config/configfile/file.go +++ b/vendor/github.com/docker/cli/cli/config/configfile/file.go @@ -20,6 +20,34 @@ import ( "github.com/sirupsen/logrus" ) +// authConfigKey is the key used to store credentials for Docker Hub. It is +// a copy of [registry.IndexServer]. +// +// [registry.IndexServer]: https://pkg.go.dev/github.com/docker/docker@v28.5.1+incompatible/registry#IndexServer +const authConfigKey = "https://index.docker.io/v1/" + +// getAuthConfigKey returns the canonical key used to look up stored +// registry credentials for the given registry domain. +// +// For the official Docker Hub registry ("docker.io"), credentials are stored +// under the historical full index address ("https://index.docker.io/v1/"). +// +// For all other registries, the input is domainName to already be a normalized +// hostname (optionally including ":port") and is returned unchanged. +// +// This function performs key normalization only; it does not validate or parse +// the input. +// +// It is similar to [registry.GetAuthConfigKey] in the daemon. +// +// [registry.GetAuthConfigKey]: https://pkg.go.dev/github.com/docker/docker@v28.5.1+incompatible/registry#GetAuthConfigKey +func getAuthConfigKey(domainName string) string { + if domainName == "docker.io" || domainName == "index.docker.io" { + return authConfigKey + } + return domainName +} + // ConfigFile ~/.docker/config.json file info type ConfigFile struct { AuthConfigs map[string]types.AuthConfig `json:"auths"` @@ -96,12 +124,12 @@ func New(fn string) *ConfigFile { // LoadFromReader reads the configuration data given and sets up the auth config // information with given directory and populates the receiver object -func (configFile *ConfigFile) LoadFromReader(configData io.Reader) error { - if err := json.NewDecoder(configData).Decode(configFile); err != nil && !errors.Is(err, io.EOF) { +func (c *ConfigFile) LoadFromReader(configData io.Reader) error { + if err := json.NewDecoder(configData).Decode(c); err != nil && !errors.Is(err, io.EOF) { return err } var err error - for addr, ac := range configFile.AuthConfigs { + for addr, ac := range c.AuthConfigs { if ac.Auth != "" { ac.Username, ac.Password, err = decodeAuth(ac.Auth) if err != nil { @@ -110,33 +138,33 @@ func (configFile *ConfigFile) LoadFromReader(configData io.Reader) error { } ac.Auth = "" ac.ServerAddress = addr - configFile.AuthConfigs[addr] = ac + c.AuthConfigs[addr] = ac } return nil } // ContainsAuth returns whether there is authentication configured // in this file or not. -func (configFile *ConfigFile) ContainsAuth() bool { - return configFile.CredentialsStore != "" || - len(configFile.CredentialHelpers) > 0 || - len(configFile.AuthConfigs) > 0 +func (c *ConfigFile) ContainsAuth() bool { + return c.CredentialsStore != "" || + len(c.CredentialHelpers) > 0 || + len(c.AuthConfigs) > 0 } // GetAuthConfigs returns the mapping of repo to auth configuration -func (configFile *ConfigFile) GetAuthConfigs() map[string]types.AuthConfig { - if configFile.AuthConfigs == nil { - configFile.AuthConfigs = make(map[string]types.AuthConfig) +func (c *ConfigFile) GetAuthConfigs() map[string]types.AuthConfig { + if c.AuthConfigs == nil { + c.AuthConfigs = make(map[string]types.AuthConfig) } - return configFile.AuthConfigs + return c.AuthConfigs } // SaveToWriter encodes and writes out all the authorization information to // the given writer -func (configFile *ConfigFile) SaveToWriter(writer io.Writer) error { +func (c *ConfigFile) SaveToWriter(writer io.Writer) error { // Encode sensitive data into a new/temp struct - tmpAuthConfigs := make(map[string]types.AuthConfig, len(configFile.AuthConfigs)) - for k, authConfig := range configFile.AuthConfigs { + tmpAuthConfigs := make(map[string]types.AuthConfig, len(c.AuthConfigs)) + for k, authConfig := range c.AuthConfigs { authCopy := authConfig // encode and save the authstring, while blanking out the original fields authCopy.Auth = encodeAuth(&authCopy) @@ -146,18 +174,18 @@ func (configFile *ConfigFile) SaveToWriter(writer io.Writer) error { tmpAuthConfigs[k] = authCopy } - saveAuthConfigs := configFile.AuthConfigs - configFile.AuthConfigs = tmpAuthConfigs - defer func() { configFile.AuthConfigs = saveAuthConfigs }() + saveAuthConfigs := c.AuthConfigs + c.AuthConfigs = tmpAuthConfigs + defer func() { c.AuthConfigs = saveAuthConfigs }() // User-Agent header is automatically set, and should not be stored in the configuration - for v := range configFile.HTTPHeaders { + for v := range c.HTTPHeaders { if strings.EqualFold(v, "User-Agent") { - delete(configFile.HTTPHeaders, v) + delete(c.HTTPHeaders, v) } } - data, err := json.MarshalIndent(configFile, "", "\t") + data, err := json.MarshalIndent(c, "", "\t") if err != nil { return err } @@ -166,16 +194,16 @@ func (configFile *ConfigFile) SaveToWriter(writer io.Writer) error { } // Save encodes and writes out all the authorization information -func (configFile *ConfigFile) Save() (retErr error) { - if configFile.Filename == "" { +func (c *ConfigFile) Save() (retErr error) { + if c.Filename == "" { return errors.New("can't save config with empty filename") } - dir := filepath.Dir(configFile.Filename) + dir := filepath.Dir(c.Filename) if err := os.MkdirAll(dir, 0o700); err != nil { return err } - temp, err := os.CreateTemp(dir, filepath.Base(configFile.Filename)) + temp, err := os.CreateTemp(dir, filepath.Base(c.Filename)) if err != nil { return err } @@ -189,7 +217,7 @@ func (configFile *ConfigFile) Save() (retErr error) { } }() - err = configFile.SaveToWriter(temp) + err = c.SaveToWriter(temp) if err != nil { return err } @@ -199,7 +227,7 @@ func (configFile *ConfigFile) Save() (retErr error) { } // Handle situation where the configfile is a symlink, and allow for dangling symlinks - cfgFile := configFile.Filename + cfgFile := c.Filename if f, err := filepath.EvalSymlinks(cfgFile); err == nil { cfgFile = f } else if os.IsNotExist(err) { @@ -217,16 +245,16 @@ func (configFile *ConfigFile) Save() (retErr error) { // ParseProxyConfig computes proxy configuration by retrieving the config for the provided host and // then checking this against any environment variables provided to the container -func (configFile *ConfigFile) ParseProxyConfig(host string, runOpts map[string]*string) map[string]*string { +func (c *ConfigFile) ParseProxyConfig(host string, runOpts map[string]*string) map[string]*string { var cfgKey string - if _, ok := configFile.Proxies[host]; !ok { + if _, ok := c.Proxies[host]; !ok { cfgKey = "default" } else { cfgKey = host } - config := configFile.Proxies[cfgKey] + config := c.Proxies[cfgKey] permitted := map[string]*string{ "HTTP_PROXY": &config.HTTPProxy, "HTTPS_PROXY": &config.HTTPSProxy, @@ -290,11 +318,11 @@ func decodeAuth(authStr string) (string, string, error) { // GetCredentialsStore returns a new credentials store from the settings in the // configuration file -func (configFile *ConfigFile) GetCredentialsStore(registryHostname string) credentials.Store { - store := credentials.NewFileStore(configFile) +func (c *ConfigFile) GetCredentialsStore(registryHostname string) credentials.Store { + store := credentials.NewFileStore(c) - if helper := getConfiguredCredentialStore(configFile, registryHostname); helper != "" { - store = newNativeStore(configFile, helper) + if helper := getConfiguredCredentialStore(c, getAuthConfigKey(registryHostname)); helper != "" { + store = newNativeStore(c, helper) } envConfig := os.Getenv(DockerEnvConfigKey) @@ -357,8 +385,9 @@ var newNativeStore = func(configFile *ConfigFile, helperSuffix string) credentia } // GetAuthConfig for a repository from the credential store -func (configFile *ConfigFile) GetAuthConfig(registryHostname string) (types.AuthConfig, error) { - return configFile.GetCredentialsStore(registryHostname).Get(registryHostname) +func (c *ConfigFile) GetAuthConfig(registryHostname string) (types.AuthConfig, error) { + acKey := getAuthConfigKey(registryHostname) + return c.GetCredentialsStore(acKey).Get(acKey) } // getConfiguredCredentialStore returns the credential helper configured for the @@ -375,13 +404,13 @@ func getConfiguredCredentialStore(c *ConfigFile, registryHostname string) string // GetAllCredentials returns all of the credentials stored in all of the // configured credential stores. -func (configFile *ConfigFile) GetAllCredentials() (map[string]types.AuthConfig, error) { +func (c *ConfigFile) GetAllCredentials() (map[string]types.AuthConfig, error) { auths := make(map[string]types.AuthConfig) addAll := func(from map[string]types.AuthConfig) { maps.Copy(auths, from) } - defaultStore := configFile.GetCredentialsStore("") + defaultStore := c.GetCredentialsStore("") newAuths, err := defaultStore.GetAll() if err != nil { return nil, err @@ -389,8 +418,8 @@ func (configFile *ConfigFile) GetAllCredentials() (map[string]types.AuthConfig, addAll(newAuths) // Auth configs from a registry-specific helper should override those from the default store. - for registryHostname := range configFile.CredentialHelpers { - newAuth, err := configFile.GetAuthConfig(registryHostname) + for registryHostname := range c.CredentialHelpers { + newAuth, err := c.GetAuthConfig(registryHostname) if err != nil { // TODO(thaJeztah): use context-logger, so that this output can be suppressed (in tests). logrus.WithError(err).Warnf("Failed to get credentials for registry: %s", registryHostname) @@ -402,16 +431,16 @@ func (configFile *ConfigFile) GetAllCredentials() (map[string]types.AuthConfig, } // GetFilename returns the file name that this config file is based on. -func (configFile *ConfigFile) GetFilename() string { - return configFile.Filename +func (c *ConfigFile) GetFilename() string { + return c.Filename } // PluginConfig retrieves the requested option for the given plugin. -func (configFile *ConfigFile) PluginConfig(pluginname, option string) (string, bool) { - if configFile.Plugins == nil { +func (c *ConfigFile) PluginConfig(pluginname, option string) (string, bool) { + if c.Plugins == nil { return "", false } - pluginConfig, ok := configFile.Plugins[pluginname] + pluginConfig, ok := c.Plugins[pluginname] if !ok { return "", false } @@ -423,14 +452,14 @@ func (configFile *ConfigFile) PluginConfig(pluginname, option string) (string, b // plugin. Passing a value of "" will remove the option. If removing // the final config item for a given plugin then also cleans up the // overall plugin entry. -func (configFile *ConfigFile) SetPluginConfig(pluginname, option, value string) { - if configFile.Plugins == nil { - configFile.Plugins = make(map[string]map[string]string) +func (c *ConfigFile) SetPluginConfig(pluginname, option, value string) { + if c.Plugins == nil { + c.Plugins = make(map[string]map[string]string) } - pluginConfig, ok := configFile.Plugins[pluginname] + pluginConfig, ok := c.Plugins[pluginname] if !ok { pluginConfig = make(map[string]string) - configFile.Plugins[pluginname] = pluginConfig + c.Plugins[pluginname] = pluginConfig } if value != "" { pluginConfig[option] = value @@ -438,6 +467,6 @@ func (configFile *ConfigFile) SetPluginConfig(pluginname, option, value string) delete(pluginConfig, option) } if len(pluginConfig) == 0 { - delete(configFile.Plugins, pluginname) + delete(c.Plugins, pluginname) } } diff --git a/vendor/github.com/docker/cli/cli/config/credentials/default_store.go b/vendor/github.com/docker/cli/cli/config/credentials/default_store.go index a36afc41f4..35b9ae4f53 100644 --- a/vendor/github.com/docker/cli/cli/config/credentials/default_store.go +++ b/vendor/github.com/docker/cli/cli/config/credentials/default_store.go @@ -2,12 +2,19 @@ package credentials import "os/exec" -// DetectDefaultStore return the default credentials store for the platform if -// no user-defined store is passed, and the store executable is available. -func DetectDefaultStore(store string) string { - if store != "" { +// DetectDefaultStore returns the credentials store to use if no user-defined +// custom helper is passed. +// +// Some platforms define a preferred helper, in which case it attempts to look +// up the helper binary before falling back to the platform's default. +// +// If no user-defined helper is passed, and no helper is found, it returns an +// empty string, which means credentials are stored unencrypted in the CLI's +// config-file without the use of a credentials store. +func DetectDefaultStore(customStore string) string { + if customStore != "" { // use user-defined - return store + return customStore } platformDefault := defaultCredentialsStore() diff --git a/vendor/github.com/google/go-containerregistry/cmd/crane/cmd/export.go b/vendor/github.com/google/go-containerregistry/cmd/crane/cmd/export.go index 497eb8e097..e77e490fa5 100644 --- a/vendor/github.com/google/go-containerregistry/cmd/crane/cmd/export.go +++ b/vendor/github.com/google/go-containerregistry/cmd/crane/cmd/export.go @@ -46,12 +46,6 @@ func NewCmdExport(options *[]crane.Option) *cobra.Command { dst = args[1] } - f, err := openFile(dst) - if err != nil { - return fmt.Errorf("failed to open %s: %w", dst, err) - } - defer f.Close() - var img v1.Image if src == "-" { tmpfile, err := os.CreateTemp("", "crane") @@ -87,6 +81,12 @@ func NewCmdExport(options *[]crane.Option) *cobra.Command { } } + f, err := openFile(dst) + if err != nil { + return fmt.Errorf("failed to open %s: %w", dst, err) + } + defer f.Close() + return crane.Export(img, f) }, } diff --git a/vendor/github.com/google/go-containerregistry/internal/gzip/zip.go b/vendor/github.com/google/go-containerregistry/internal/gzip/zip.go index 018c0f8c0f..98100c0246 100644 --- a/vendor/github.com/google/go-containerregistry/internal/gzip/zip.go +++ b/vendor/github.com/google/go-containerregistry/internal/gzip/zip.go @@ -51,33 +51,31 @@ func ReadCloserLevel(r io.ReadCloser, level int) io.ReadCloser { // Returns err so we can pw.CloseWithError(err) go func() error { - // TODO(go1.14): Just defer {pw,gw,r}.Close like you'd expect. - // Context: https://golang.org/issue/24283 + // Always close the source reader when the goroutine exits, + // regardless of which error path is taken. This prevents + // resource leaks (e.g. pullLimiter token slots held by + // limitedReadCloser wrappers around r). + defer r.Close() + gw, err := gzip.NewWriterLevel(bw, level) if err != nil { return pw.CloseWithError(err) } + defer gw.Close() + defer pw.Close() if _, err := io.Copy(gw, r); err != nil { - defer r.Close() - defer gw.Close() return pw.CloseWithError(err) } - // Close gzip writer to Flush it and write gzip trailers. if err := gw.Close(); err != nil { return pw.CloseWithError(err) } - // Flush bufio writer to ensure we write out everything. if err := bw.Flush(); err != nil { return pw.CloseWithError(err) } - // We don't really care if these fail. - defer pw.Close() - defer r.Close() - return nil }() diff --git a/vendor/github.com/google/go-containerregistry/internal/limit/limit.go b/vendor/github.com/google/go-containerregistry/internal/limit/limit.go new file mode 100644 index 0000000000..29d2b08ace --- /dev/null +++ b/vendor/github.com/google/go-containerregistry/internal/limit/limit.go @@ -0,0 +1,36 @@ +// Copyright 2026 Google LLC All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package limit provides bounded reads from io.Readers. +package limit + +import ( + "fmt" + "io" +) + +// ReadAll reads from r until EOF and returns the data. If r produces more +// than max bytes, it returns an error instead of a silently truncated +// slice. Use this in preference to io.ReadAll(io.LimitReader(r, max)) +// when callers must distinguish a complete read from a truncated one. +func ReadAll(r io.Reader, max int64) ([]byte, error) { + b, err := io.ReadAll(io.LimitReader(r, max+1)) + if err != nil { + return nil, err + } + if int64(len(b)) > max { + return nil, fmt.Errorf("body exceeds %d byte limit", max) + } + return b, nil +} diff --git a/vendor/github.com/google/go-containerregistry/pkg/name/registry.go b/vendor/github.com/google/go-containerregistry/pkg/name/registry.go index 7531d2426b..478021486c 100644 --- a/vendor/github.com/google/go-containerregistry/pkg/name/registry.go +++ b/vendor/github.com/google/go-containerregistry/pkg/name/registry.go @@ -28,10 +28,10 @@ import ( var reLocal = regexp.MustCompile(`.*\.localhost(?::\d{1,5})?$`) // Detect the loopback IP (127.0.0.1) -var reLoopback = regexp.MustCompile(regexp.QuoteMeta("127.0.0.1")) +var reLoopback = regexp.MustCompile(`^127\.0\.0\.1(?::\d{1,5})?$`) // Detect the loopback IPV6 (::1) -var reipv6Loopback = regexp.MustCompile(regexp.QuoteMeta("::1")) +var reipv6Loopback = regexp.MustCompile(`^(::1|\[::1\](?::\d{1,5})?)$`) // Registry stores a docker registry name in a structured form. type Registry struct { diff --git a/vendor/github.com/google/go-containerregistry/pkg/name/repository.go b/vendor/github.com/google/go-containerregistry/pkg/name/repository.go index 290797575e..efde6e869d 100644 --- a/vendor/github.com/google/go-containerregistry/pkg/name/repository.go +++ b/vendor/github.com/google/go-containerregistry/pkg/name/repository.go @@ -85,11 +85,12 @@ func NewRepository(name string, opts ...Option) (Repository, error) { var registry string repo := name parts := strings.SplitN(name, regRepoDelimiter, 2) - if len(parts) == 2 && (strings.ContainsRune(parts[0], '.') || strings.ContainsRune(parts[0], ':')) { + maybeRegistry := parts[0] + if len(parts) == 2 && (maybeRegistry == "localhost" || strings.ContainsAny(maybeRegistry, ".:")) { // The first part of the repository is treated as the registry domain - // iff it contains a '.' or ':' character, otherwise it is all repository - // and the domain defaults to Docker Hub. - registry = parts[0] + // if it is localhost or contains a '.' or ':' character, otherwise it + // is all repository and the domain defaults to Docker Hub. + registry = maybeRegistry repo = parts[1] } diff --git a/vendor/github.com/google/go-containerregistry/pkg/registry/blobs.go b/vendor/github.com/google/go-containerregistry/pkg/registry/blobs.go index 8aaa86fb3a..5b666ddbf2 100644 --- a/vendor/github.com/google/go-containerregistry/pkg/registry/blobs.go +++ b/vendor/github.com/google/go-containerregistry/pkg/registry/blobs.go @@ -59,7 +59,7 @@ type BlobHandler interface { // backend that can serve metadata about blobs. type BlobStatHandler interface { // Stat returns the size of the blob, or errNotFound if the blob wasn't - // found, or redirectError if the blob can be found elsewhere. + // found, or RedirectError if the blob can be found elsewhere. Stat(ctx context.Context, repo string, h v1.Hash) (int64, error) } @@ -82,10 +82,10 @@ type BlobDeleteHandler interface { Delete(ctx context.Context, repo string, h v1.Hash) error } -// redirectError represents a signal that the blob handler doesn't have the blob +// RedirectError represents a signal that the blob handler doesn't have the blob // contents, but that those contents are at another location which registry // clients should redirect to. -type redirectError struct { +type RedirectError struct { // Location is the location to find the contents. Location string @@ -101,10 +101,10 @@ func (r *bytesCloser) Close() error { return nil } -func (e redirectError) Error() string { return fmt.Sprintf("redirecting (%d): %s", e.Code, e.Location) } +func (e RedirectError) Error() string { return fmt.Sprintf("redirecting (%d): %s", e.Code, e.Location) } -// errNotFound represents an error locating the blob. -var errNotFound = errors.New("not found") +// ErrNotFound represents an error locating the blob. +var ErrNotFound = errors.New("not found") type memHandler struct { m map[string][]byte @@ -119,7 +119,7 @@ func (m *memHandler) Stat(_ context.Context, _ string, h v1.Hash) (int64, error) b, found := m.m[h.String()] if !found { - return 0, errNotFound + return 0, ErrNotFound } return int64(len(b)), nil } @@ -130,7 +130,7 @@ func (m *memHandler) Get(_ context.Context, _ string, h v1.Hash) (io.ReadCloser, b, found := m.m[h.String()] if !found { - return nil, errNotFound + return nil, ErrNotFound } return &bytesCloser{bytes.NewReader(b)}, nil } @@ -153,7 +153,7 @@ func (m *memHandler) Delete(_ context.Context, _ string, h v1.Hash) error { defer m.lock.Unlock() if _, found := m.m[h.String()]; !found { - return errNotFound + return ErrNotFound } delete(m.m, h.String()) @@ -206,10 +206,10 @@ func (b *blobs) handle(resp http.ResponseWriter, req *http.Request) *regError { var size int64 if bsh, ok := b.blobHandler.(BlobStatHandler); ok { size, err = bsh.Stat(req.Context(), repo, h) - if errors.Is(err, errNotFound) { + if errors.Is(err, ErrNotFound) { return regErrBlobUnknown } else if err != nil { - var rerr redirectError + var rerr RedirectError if errors.As(err, &rerr) { http.Redirect(resp, req, rerr.Location, rerr.Code) return nil @@ -218,10 +218,10 @@ func (b *blobs) handle(resp http.ResponseWriter, req *http.Request) *regError { } } else { rc, err := b.blobHandler.Get(req.Context(), repo, h) - if errors.Is(err, errNotFound) { + if errors.Is(err, ErrNotFound) { return regErrBlobUnknown } else if err != nil { - var rerr redirectError + var rerr RedirectError if errors.As(err, &rerr) { http.Redirect(resp, req, rerr.Location, rerr.Code) return nil @@ -254,10 +254,10 @@ func (b *blobs) handle(resp http.ResponseWriter, req *http.Request) *regError { var r io.Reader if bsh, ok := b.blobHandler.(BlobStatHandler); ok { size, err = bsh.Stat(req.Context(), repo, h) - if errors.Is(err, errNotFound) { + if errors.Is(err, ErrNotFound) { return regErrBlobUnknown } else if err != nil { - var rerr redirectError + var rerr RedirectError if errors.As(err, &rerr) { http.Redirect(resp, req, rerr.Location, rerr.Code) return nil @@ -266,10 +266,10 @@ func (b *blobs) handle(resp http.ResponseWriter, req *http.Request) *regError { } rc, err := b.blobHandler.Get(req.Context(), repo, h) - if errors.Is(err, errNotFound) { + if errors.Is(err, ErrNotFound) { return regErrBlobUnknown } else if err != nil { - var rerr redirectError + var rerr RedirectError if errors.As(err, &rerr) { http.Redirect(resp, req, rerr.Location, rerr.Code) return nil @@ -283,10 +283,10 @@ func (b *blobs) handle(resp http.ResponseWriter, req *http.Request) *regError { } else { tmp, err := b.blobHandler.Get(req.Context(), repo, h) - if errors.Is(err, errNotFound) { + if errors.Is(err, ErrNotFound) { return regErrBlobUnknown } else if err != nil { - var rerr redirectError + var rerr RedirectError if errors.As(err, &rerr) { http.Redirect(resp, req, rerr.Location, rerr.Code) return nil diff --git a/vendor/github.com/google/go-containerregistry/pkg/registry/blobs_disk.go b/vendor/github.com/google/go-containerregistry/pkg/registry/blobs_disk.go index 331aade7f1..fe1c7c7897 100644 --- a/vendor/github.com/google/go-containerregistry/pkg/registry/blobs_disk.go +++ b/vendor/github.com/google/go-containerregistry/pkg/registry/blobs_disk.go @@ -38,7 +38,7 @@ func (m *diskHandler) blobHashPath(h v1.Hash) string { func (m *diskHandler) Stat(_ context.Context, _ string, h v1.Hash) (int64, error) { f, err := os.Open(m.blobHashPath(h)) if errors.Is(err, os.ErrNotExist) { - return 0, errNotFound + return 0, ErrNotFound } else if err != nil { return 0, err } @@ -49,13 +49,15 @@ func (m *diskHandler) Stat(_ context.Context, _ string, h v1.Hash) (int64, error return 0, err } if got != h { - return 0, fmt.Errorf("%w: blob %s has digest %s", errNotFound, h, got) + return 0, fmt.Errorf("%w: blob %s has digest %s", ErrNotFound, h, got) } return size, nil } + func (m *diskHandler) Get(_ context.Context, _ string, h v1.Hash) (io.ReadCloser, error) { return os.Open(m.blobHashPath(h)) } + func (m *diskHandler) Put(_ context.Context, _ string, h v1.Hash, rc io.ReadCloser) error { // Put the temp file in the same directory to avoid cross-device problems // during the os.Rename. The filenames cannot conflict. @@ -76,6 +78,7 @@ func (m *diskHandler) Put(_ context.Context, _ string, h v1.Hash, rc io.ReadClos } return os.Rename(f.Name(), m.blobHashPath(h)) } + func (m *diskHandler) Delete(_ context.Context, _ string, h v1.Hash) error { return os.Remove(m.blobHashPath(h)) } diff --git a/vendor/github.com/google/go-containerregistry/pkg/v1/layout/blob.go b/vendor/github.com/google/go-containerregistry/pkg/v1/layout/blob.go index 2e5f4358dd..48d5022392 100644 --- a/vendor/github.com/google/go-containerregistry/pkg/v1/layout/blob.go +++ b/vendor/github.com/google/go-containerregistry/pkg/v1/layout/blob.go @@ -15,6 +15,7 @@ package layout import ( + "fmt" "io" "os" @@ -23,15 +24,56 @@ import ( // Blob returns a blob with the given hash from the Path. func (l Path) Blob(h v1.Hash) (io.ReadCloser, error) { - return os.Open(l.blobPath(h)) + return l.openBlob(h) } // Bytes is a convenience function to return a blob from the Path as // a byte slice. func (l Path) Bytes(h v1.Hash) ([]byte, error) { - return os.ReadFile(l.blobPath(h)) + f, err := l.openBlob(h) + if err != nil { + return nil, err + } + defer f.Close() + return io.ReadAll(f) } func (l Path) blobPath(h v1.Hash) string { return l.path("blobs", h.Algorithm, h.Hex) } + +func (l Path) openBlob(h v1.Hash) (*os.File, error) { + p := l.blobPath(h) + info, err := os.Lstat(p) + if err != nil { + return nil, err + } + if info.Mode()&os.ModeSymlink != 0 { + return nil, fmt.Errorf("layout blob %s is a symlink", h) + } + if !info.Mode().IsRegular() { + return nil, fmt.Errorf("layout blob %s is not a regular file", h) + } + f, err := os.Open(p) + if err != nil { + return nil, err + } + closeFile := true + defer func() { + if closeFile { + f.Close() + } + }() + stat, err := f.Stat() + if err != nil { + return nil, err + } + if !stat.Mode().IsRegular() { + return nil, fmt.Errorf("layout blob %s is not a regular file", h) + } + if !os.SameFile(info, stat) { + return nil, fmt.Errorf("layout blob %s changed while opening", h) + } + closeFile = false + return f, nil +} diff --git a/vendor/github.com/google/go-containerregistry/pkg/v1/mutate/mutate.go b/vendor/github.com/google/go-containerregistry/pkg/v1/mutate/mutate.go index a6186b3756..09458e96cb 100644 --- a/vendor/github.com/google/go-containerregistry/pkg/v1/mutate/mutate.go +++ b/vendor/github.com/google/go-containerregistry/pkg/v1/mutate/mutate.go @@ -365,6 +365,15 @@ func extractLayer(tarWriter *tar.Writer, fileMap map[string]bool, layer v1.Layer } } } + + // Drain any bytes the tar.Reader did not consume (trailing data after the + // end-of-archive marker) so the underlying verifying reader reaches io.EOF + // and the layer's digest is verified. Without this, a layer whose contents + // do not match the manifest's layer digest is extracted without error. + // pkg/v1/validate/layer.go performs the same drain. + if _, err := io.Copy(io.Discard, layerReader); err != nil { + return fmt.Errorf("verifying layer: %w", err) + } return nil } @@ -501,6 +510,12 @@ func layerTime(layer v1.Layer, t time.Time) (v1.Layer, error) { } } + // Drain trailing bytes so the underlying verifying reader reaches io.EOF + // and the layer digest is verified (see extractLayer). + if _, err := io.Copy(io.Discard, layerReader); err != nil { + return nil, fmt.Errorf("verifying layer: %w", err) + } + if err := tarWriter.Close(); err != nil { return nil, err } diff --git a/vendor/github.com/google/go-containerregistry/pkg/v1/remote/catalog.go b/vendor/github.com/google/go-containerregistry/pkg/v1/remote/catalog.go index a0281b9fd2..7b9cc0e247 100644 --- a/vendor/github.com/google/go-containerregistry/pkg/v1/remote/catalog.go +++ b/vendor/github.com/google/go-containerregistry/pkg/v1/remote/catalog.go @@ -122,7 +122,7 @@ func (f *fetcher) catalogPage(ctx context.Context, reg name.Registry, next strin return nil, err } - uri, err := getNextPageURL(resp) + uri, err := getNextPageURLForRegistry(resp, reg) if err != nil { return nil, err } diff --git a/vendor/github.com/google/go-containerregistry/pkg/v1/remote/fetcher.go b/vendor/github.com/google/go-containerregistry/pkg/v1/remote/fetcher.go index 3bda1d09c4..4b238d1290 100644 --- a/vendor/github.com/google/go-containerregistry/pkg/v1/remote/fetcher.go +++ b/vendor/github.com/google/go-containerregistry/pkg/v1/remote/fetcher.go @@ -24,6 +24,7 @@ import ( "net/url" "strings" + "github.com/google/go-containerregistry/internal/limit" "github.com/google/go-containerregistry/internal/redact" "github.com/google/go-containerregistry/internal/verify" "github.com/google/go-containerregistry/pkg/authn" @@ -168,7 +169,7 @@ func (f *fetcher) fetchManifest(ctx context.Context, ref name.Reference, accepta return nil, nil, err } - manifest, err := io.ReadAll(io.LimitReader(resp.Body, manifestLimit)) + manifest, err := limit.ReadAll(resp.Body, manifestLimit) if err != nil { return nil, nil, err } diff --git a/vendor/github.com/google/go-containerregistry/pkg/v1/remote/list.go b/vendor/github.com/google/go-containerregistry/pkg/v1/remote/list.go index 910d2a94c7..9beff7dc24 100644 --- a/vendor/github.com/google/go-containerregistry/pkg/v1/remote/list.go +++ b/vendor/github.com/google/go-containerregistry/pkg/v1/remote/list.go @@ -17,6 +17,7 @@ package remote import ( "context" "encoding/json" + "errors" "fmt" "net/http" "net/url" @@ -85,7 +86,7 @@ func (f *fetcher) listPage(ctx context.Context, repo name.Repository, next strin return nil, err } - uri, err := getNextPageURL(resp) + uri, err := getNextPageURL(resp, repo) if err != nil { return nil, err } @@ -99,8 +100,9 @@ func (f *fetcher) listPage(ctx context.Context, repo name.Repository, next strin // getNextPageURL checks if there is a Link header in a http.Response which // contains a link to the next page. If yes it returns the url.URL of the next -// page otherwise it returns nil. -func getNextPageURL(resp *http.Response) (*url.URL, error) { +// page otherwise it returns nil. It validates that the resolved URL points +// to the same registry to prevent SSRF attacks. +func getNextPageURL(resp *http.Response, repo name.Repository) (*url.URL, error) { link := resp.Header.Get("Link") if link == "" { return nil, nil @@ -115,6 +117,9 @@ func getNextPageURL(resp *http.Response) (*url.URL, error) { return nil, fmt.Errorf("failed to parse link header: missing '>' in: %s", link) } link = link[1:end] + if link == "" { + return nil, nil + } linkURL, err := url.Parse(link) if err != nil { @@ -124,9 +129,31 @@ func getNextPageURL(resp *http.Response) (*url.URL, error) { return nil, nil } linkURL = resp.Request.URL.ResolveReference(linkURL) + + // Validate that the pagination URL points to the same registry to prevent SSRF. + if err := validatePaginationURL(linkURL, repo); err != nil { + return nil, err + } + return linkURL, nil } +// validatePaginationURL checks that a pagination URL is safe to follow. +func validatePaginationURL(u *url.URL, repo name.Repository) error { + return validatePaginationURLHost(u, repo.Scheme(), repo.RegistryStr()) +} + +// validatePaginationURLHost checks that a pagination URL is safe to follow. +func validatePaginationURLHost(u *url.URL, scheme, host string) error { + if u.Scheme != scheme { + return fmt.Errorf("pagination URL scheme %q does not match registry scheme %q", u.Scheme, scheme) + } + if u.Host != host { + return errors.New("pagination URL host does not match registry host: potential SSRF attack") + } + return nil +} + type Lister struct { f *fetcher repo name.Repository @@ -150,3 +177,40 @@ func (l *Lister) Next(ctx context.Context) (*Tags, error) { func (l *Lister) HasNext() bool { return l.page != nil && (!l.needMore || l.page.Next != "") } + +// getNextPageURLForRegistry is like getNextPageURL but for name.Registry. +func getNextPageURLForRegistry(resp *http.Response, reg name.Registry) (*url.URL, error) { + link := resp.Header.Get("Link") + if link == "" { + return nil, nil + } + + if link[0] != '<' { + return nil, fmt.Errorf("failed to parse link header: missing '<' in: %s", link) + } + + end := strings.Index(link, ">") + if end == -1 { + return nil, fmt.Errorf("failed to parse link header: missing '>' in: %s", link) + } + link = link[1:end] + if link == "" { + return nil, nil + } + + linkURL, err := url.Parse(link) + if err != nil { + return nil, err + } + if resp.Request == nil || resp.Request.URL == nil { + return nil, nil + } + linkURL = resp.Request.URL.ResolveReference(linkURL) + + // Validate that the pagination URL points to the same registry to prevent SSRF. + if err := validatePaginationURLHost(linkURL, reg.Scheme(), reg.RegistryStr()); err != nil { + return nil, err + } + + return linkURL, nil +} diff --git a/vendor/github.com/google/go-containerregistry/pkg/v1/remote/referrers.go b/vendor/github.com/google/go-containerregistry/pkg/v1/remote/referrers.go index 17e9c26ed0..c23e1d83b6 100644 --- a/vendor/github.com/google/go-containerregistry/pkg/v1/remote/referrers.go +++ b/vendor/github.com/google/go-containerregistry/pkg/v1/remote/referrers.go @@ -18,10 +18,10 @@ import ( "bytes" "context" "errors" - "io" "net/http" "strings" + "github.com/google/go-containerregistry/internal/limit" "github.com/google/go-containerregistry/pkg/name" v1 "github.com/google/go-containerregistry/pkg/v1" "github.com/google/go-containerregistry/pkg/v1/empty" @@ -67,7 +67,7 @@ func (f *fetcher) fetchReferrers(ctx context.Context, filter map[string]string, var b []byte if resp.StatusCode == http.StatusOK && resp.Header.Get("Content-Type") == string(types.OCIImageIndex) { - b, err = io.ReadAll(io.LimitReader(resp.Body, manifestLimit)) + b, err = limit.ReadAll(resp.Body, manifestLimit) if err != nil { return nil, err } diff --git a/vendor/github.com/google/go-containerregistry/pkg/v1/remote/transport/bearer.go b/vendor/github.com/google/go-containerregistry/pkg/v1/remote/transport/bearer.go index f845dd1fba..cbc2b1d4f9 100644 --- a/vendor/github.com/google/go-containerregistry/pkg/v1/remote/transport/bearer.go +++ b/vendor/github.com/google/go-containerregistry/pkg/v1/remote/transport/bearer.go @@ -19,13 +19,13 @@ import ( "encoding/json" "errors" "fmt" - "io" "net" "net/http" "net/url" "strings" "sync" + "github.com/google/go-containerregistry/internal/limit" "github.com/google/go-containerregistry/internal/redact" "github.com/google/go-containerregistry/pkg/authn" "github.com/google/go-containerregistry/pkg/logs" @@ -247,7 +247,23 @@ func (bt *bearerTransport) RoundTrip(in *http.Request) (*http.Response, error) { if err = bt.refresh(in.Context()); err != nil { return nil, err } - return sendRequest() + // Re-attach the freshly fetched token, but only when the request is + // still talking to the registry we authenticated against. matchesHost + // guards against forwarding the Authorization header across an + // http.Client-level redirect to a different host: a malicious or + // compromised registry can 302 the request to an attacker-controlled + // host, answer the follow-up with a Bearer challenge, and harvest the + // token if we re-attach it unconditionally. For a cross-host request + // fall back to sendRequest(), which omits the credential, rather than + // leaking it to a host we never logged in to. + if !matchesHost(bt.registry.RegistryStr(), in, bt.scheme) { + return sendRequest() + } + bt.mx.RLock() + tok := bt.bearer.RegistryToken + bt.mx.RUnlock() + in.Header.Set("Authorization", fmt.Sprintf("Bearer %s", tok)) + return bt.inner.RoundTrip(in) } return res, err @@ -420,7 +436,7 @@ func (bt *bearerTransport) refreshOauth(ctx context.Context) ([]byte, error) { return nil, err } - return io.ReadAll(io.LimitReader(resp.Body, maxTokenBodySize)) + return limit.ReadAll(resp.Body, maxTokenBodySize) } // https://docs.docker.com/registry/spec/auth/token/ @@ -465,5 +481,5 @@ func (bt *bearerTransport) refreshBasic(ctx context.Context) ([]byte, error) { return nil, err } - return io.ReadAll(io.LimitReader(resp.Body, maxTokenBodySize)) + return limit.ReadAll(resp.Body, maxTokenBodySize) } diff --git a/vendor/github.com/google/go-containerregistry/pkg/v1/remote/transport/error.go b/vendor/github.com/google/go-containerregistry/pkg/v1/remote/transport/error.go index 934484db8d..7b14b8c413 100644 --- a/vendor/github.com/google/go-containerregistry/pkg/v1/remote/transport/error.go +++ b/vendor/github.com/google/go-containerregistry/pkg/v1/remote/transport/error.go @@ -22,6 +22,7 @@ import ( "net/http" "strings" + "github.com/google/go-containerregistry/internal/limit" "github.com/google/go-containerregistry/internal/redact" ) @@ -167,7 +168,7 @@ func CheckError(resp *http.Response, codes ...int) error { } } - b, err := io.ReadAll(io.LimitReader(resp.Body, maxErrorBodySize)) + b, err := limit.ReadAll(resp.Body, maxErrorBodySize) if err != nil { return err } @@ -191,7 +192,7 @@ func makeError(resp *http.Response, body []byte) *Error { } func retryError(resp *http.Response) error { - b, err := io.ReadAll(io.LimitReader(resp.Body, maxErrorBodySize)) + b, err := limit.ReadAll(resp.Body, maxErrorBodySize) if err != nil { return err } diff --git a/vendor/github.com/google/go-containerregistry/pkg/v1/tarball/image.go b/vendor/github.com/google/go-containerregistry/pkg/v1/tarball/image.go index 96c0cce49d..9b0350b121 100644 --- a/vendor/github.com/google/go-containerregistry/pkg/v1/tarball/image.go +++ b/vendor/github.com/google/go-containerregistry/pkg/v1/tarball/image.go @@ -248,7 +248,7 @@ func followLinks(opener Opener, filePath string, visited map[string]bool) (io.Re if err != nil { return nil, err } - if hdr.Name == filePath { + if path.Clean(hdr.Name) == path.Clean(filePath) { if hdr.Typeflag == tar.TypeSymlink || hdr.Typeflag == tar.TypeLink { currentDir := filepath.Dir(filePath) return followLinks(opener, path.Join(currentDir, path.Clean(hdr.Linkname)), visited) @@ -374,6 +374,9 @@ func (c *compressedImage) Manifest() (*v1.Manifest, error) { if err != nil { return nil, err } + if i >= len(cfg.RootFS.DiffIDs) { + return nil, fmt.Errorf("tarball manifest references %d layer(s) but config has %d rootfs.diff_ids; the config may not describe a runnable image (for example, a buildkit cacheconfig)", len(c.imgDescriptor.Layers), len(cfg.RootFS.DiffIDs)) + } diffid := cfg.RootFS.DiffIDs[i] if d, ok := c.imgDescriptor.LayerSources[diffid]; ok { // If it's a foreign layer, just append the descriptor so we can avoid diff --git a/vendor/github.com/google/go-containerregistry/pkg/v1/tarball/write.go b/vendor/github.com/google/go-containerregistry/pkg/v1/tarball/write.go index e607df164a..062268a5b9 100644 --- a/vendor/github.com/google/go-containerregistry/pkg/v1/tarball/write.go +++ b/vendor/github.com/google/go-containerregistry/pkg/v1/tarball/write.go @@ -191,16 +191,7 @@ func writeImagesToTar(imageToTags map[v1.Image][]string, m []byte, size int64, w } seenLayerDigests[hex] = struct{}{} - r, err := l.Compressed() - if err != nil { - return sendProgressWriterReturn(pw, err) - } - blobSize, err := l.Size() - if err != nil { - return sendProgressWriterReturn(pw, err) - } - - if err := writeTarEntry(tf, layerFiles[i], r, blobSize); err != nil { + if err := writeLayer(tf, layerFiles[i], l); err != nil { return sendProgressWriterReturn(pw, err) } } @@ -380,6 +371,23 @@ func writeTarEntry(tf *tar.Writer, path string, r io.Reader, size int64) error { return err } +// writeLayer streams a layer's compressed blob into the tar writer and closes +// the reader before returning. The close releases any pull-limiter slot held +// by a remote-backed layer (remote.WithJobs); leaving it open would deadlock +// the write loop after defaultJobs layers. +func writeLayer(tf *tar.Writer, name string, l v1.Layer) error { + r, err := l.Compressed() + if err != nil { + return err + } + defer r.Close() + blobSize, err := l.Size() + if err != nil { + return err + } + return writeTarEntry(tf, name, r, blobSize) +} + // ComputeManifest get the manifest.json that will be written to the tarball // for multiple references func ComputeManifest(refToImage map[name.Reference]v1.Image) (Manifest, error) { diff --git a/vendor/golang.org/x/crypto/chacha20poly1305/chacha20poly1305_amd64.go b/vendor/golang.org/x/crypto/chacha20poly1305/chacha20poly1305_amd64.go index b850e772e1..bfe546b60a 100644 --- a/vendor/golang.org/x/crypto/chacha20poly1305/chacha20poly1305_amd64.go +++ b/vendor/golang.org/x/crypto/chacha20poly1305/chacha20poly1305_amd64.go @@ -20,7 +20,7 @@ func chacha20Poly1305Open(dst []byte, key []uint32, src, ad []byte) bool func chacha20Poly1305Seal(dst []byte, key []uint32, src, ad []byte) var ( - useAVX2 = cpu.X86.HasAVX2 && cpu.X86.HasBMI2 + useAVX2 = cpu.X86.HasSSSE3 && cpu.X86.HasAVX2 && cpu.X86.HasBMI2 ) // setupState writes a ChaCha20 input matrix to state. See @@ -47,7 +47,7 @@ func setupState(state *[16]uint32, key *[32]byte, nonce []byte) { } func (c *chacha20poly1305) seal(dst, nonce, plaintext, additionalData []byte) []byte { - if !cpu.X86.HasSSSE3 { + if !useAVX2 { return c.sealGeneric(dst, nonce, plaintext, additionalData) } @@ -66,7 +66,7 @@ func (c *chacha20poly1305) seal(dst, nonce, plaintext, additionalData []byte) [] } func (c *chacha20poly1305) open(dst, nonce, ciphertext, additionalData []byte) ([]byte, error) { - if !cpu.X86.HasSSSE3 { + if !useAVX2 { return c.openGeneric(dst, nonce, ciphertext, additionalData) } diff --git a/vendor/golang.org/x/crypto/chacha20poly1305/chacha20poly1305_amd64.s b/vendor/golang.org/x/crypto/chacha20poly1305/chacha20poly1305_amd64.s index fd5ee845f9..c703c13471 100644 --- a/vendor/golang.org/x/crypto/chacha20poly1305/chacha20poly1305_amd64.s +++ b/vendor/golang.org/x/crypto/chacha20poly1305/chacha20poly1305_amd64.s @@ -6,27 +6,6 @@ // func polyHashADInternal<>() TEXT polyHashADInternal<>(SB), NOSPLIT, $0 - // Hack: Must declare #define macros inside of a function due to Avo constraints - // ROL rotates the uint32s in register R left by N bits, using temporary T. - #define ROL(N, R, T) \ - MOVO R, T; \ - PSLLL $(N), T; \ - PSRLL $(32-(N)), R; \ - PXOR T, R - - // ROL8 rotates the uint32s in register R left by 8, using temporary T if needed. - #ifdef GOAMD64_v2 - #define ROL8(R, T) PSHUFB ·rol8<>(SB), R - #else - #define ROL8(R, T) ROL(8, R, T) - #endif - - // ROL16 rotates the uint32s in register R left by 16, using temporary T if needed. - #ifdef GOAMD64_v2 - #define ROL16(R, T) PSHUFB ·rol16<>(SB), R - #else - #define ROL16(R, T) ROL(16, R, T) - #endif XORQ R10, R10 XORQ R11, R11 XORQ R12, R12 @@ -192,676 +171,112 @@ hashADDone: // Requires: AVX, AVX2, BMI2, CMOV, SSE2 TEXT ·chacha20Poly1305Open(SB), $288-97 // For aligned stack access - MOVQ SP, BP - ADDQ $0x20, BP - ANDQ $-32, BP - MOVQ dst_base+0(FP), DI - MOVQ key_base+24(FP), R8 - MOVQ src_base+48(FP), SI - MOVQ src_len+56(FP), BX - MOVQ ad_base+72(FP), CX - - // Check for AVX2 support - CMPB ·useAVX2+0(SB), $0x01 - JE chacha20Poly1305Open_AVX2 + MOVQ SP, BP + ADDQ $0x20, BP + ANDQ $-32, BP + MOVQ dst_base+0(FP), DI + MOVQ key_base+24(FP), R8 + MOVQ src_base+48(FP), SI + MOVQ src_len+56(FP), BX + MOVQ ad_base+72(FP), CX + VZEROUPPER + VMOVDQU ·chacha20Constants<>+0(SB), Y0 + VBROADCASTI128 16(R8), Y14 + VBROADCASTI128 32(R8), Y12 + VBROADCASTI128 48(R8), Y4 + VPADDD ·avx2InitMask<>+0(SB), Y4, Y4 // Special optimization, for very short buffers - CMPQ BX, $0x80 - JBE openSSE128 - - // For long buffers, prepare the poly key first - MOVOU ·chacha20Constants<>+0(SB), X0 - MOVOU 16(R8), X3 - MOVOU 32(R8), X6 - MOVOU 48(R8), X9 - MOVO X9, X13 - - // Store state on stack for future use - MOVO X3, 32(BP) - MOVO X6, 48(BP) - MOVO X9, 128(BP) - MOVQ $0x0000000a, R9 - -openSSEPreparePolyKey: - PADDD X3, X0 - PXOR X0, X9 - ROL16(X9, X12) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X12 - PSLLL $0x0c, X12 - PSRLL $0x14, X3 - PXOR X12, X3 - PADDD X3, X0 - PXOR X0, X9 - ROL8(X9, X12) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X12 - PSLLL $0x07, X12 - PSRLL $0x19, X3 - PXOR X12, X3 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xdb - BYTE $0x04 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xf6 - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xc9 - BYTE $0x0c - PADDD X3, X0 - PXOR X0, X9 - ROL16(X9, X12) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X12 - PSLLL $0x0c, X12 - PSRLL $0x14, X3 - PXOR X12, X3 - PADDD X3, X0 - PXOR X0, X9 - ROL8(X9, X12) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X12 - PSLLL $0x07, X12 - PSRLL $0x19, X3 - PXOR X12, X3 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xdb - BYTE $0x0c - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xf6 - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xc9 - BYTE $0x04 - DECQ R9 - JNE openSSEPreparePolyKey + CMPQ BX, $0xc0 + JBE openAVX2192 + CMPQ BX, $0x00000140 + JBE openAVX2320 + + // For the general key prepare the key first - as a byproduct we have 64 bytes of cipher stream + VMOVDQA Y14, 32(BP) + VMOVDQA Y12, 64(BP) + VMOVDQA Y4, 192(BP) + MOVQ $0x0000000a, R9 + +openAVX2PreparePolyKey: + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPALIGNR $0x04, Y14, Y14, Y14 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x0c, Y4, Y4, Y4 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPALIGNR $0x0c, Y14, Y14, Y14 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x04, Y4, Y4, Y4 + DECQ R9 + JNE openAVX2PreparePolyKey + VPADDD ·chacha20Constants<>+0(SB), Y0, Y0 + VPADDD 32(BP), Y14, Y14 + VPADDD 64(BP), Y12, Y12 + VPADDD 192(BP), Y4, Y4 + VPERM2I128 $0x02, Y0, Y14, Y3 - // A0|B0 hold the Poly1305 32-byte key, C0,D0 can be discarded - PADDL ·chacha20Constants<>+0(SB), X0 - PADDL 32(BP), X3 + // Clamp and store poly key + VPAND ·polyClampMask<>+0(SB), Y3, Y3 + VMOVDQA Y3, (BP) - // Clamp and store the key - PAND ·polyClampMask<>+0(SB), X0 - MOVO X0, (BP) - MOVO X3, 16(BP) + // Stream for the first 64 bytes + VPERM2I128 $0x13, Y0, Y14, Y0 + VPERM2I128 $0x13, Y12, Y4, Y14 - // Hash AAD + // Hash AD + first 64 bytes MOVQ ad_len+80(FP), R9 CALL polyHashADInternal<>(SB) + XORQ CX, CX -openSSEMainLoop: - CMPQ BX, $0x00000100 - JB openSSEMainLoopDone - - // Load state, increment counter blocks - MOVO ·chacha20Constants<>+0(SB), X0 - MOVO 32(BP), X3 - MOVO 48(BP), X6 - MOVO 128(BP), X9 - PADDL ·sseIncMask<>+0(SB), X9 - MOVO X0, X1 - MOVO X3, X4 - MOVO X6, X7 - MOVO X9, X10 - PADDL ·sseIncMask<>+0(SB), X10 - MOVO X1, X2 - MOVO X4, X5 - MOVO X7, X8 - MOVO X10, X11 - PADDL ·sseIncMask<>+0(SB), X11 - MOVO X2, X12 - MOVO X5, X13 - MOVO X8, X14 - MOVO X11, X15 - PADDL ·sseIncMask<>+0(SB), X15 - - // Store counters - MOVO X9, 80(BP) - MOVO X10, 96(BP) - MOVO X11, 112(BP) - MOVO X15, 128(BP) - - // There are 10 ChaCha20 iterations of 2QR each, so for 6 iterations we hash - // 2 blocks, and for the remaining 4 only 1 block - for a total of 16 - MOVQ $0x00000004, CX - MOVQ SI, R9 - -openSSEInternalLoop: - MOVO X14, 64(BP) - PADDD X3, X0 - PXOR X0, X9 - ROL16(X9, X14) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X14 - PSLLL $0x0c, X14 - PSRLL $0x14, X3 - PXOR X14, X3 - PADDD X3, X0 - PXOR X0, X9 - ROL8(X9, X14) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X14 - PSLLL $0x07, X14 - PSRLL $0x19, X3 - PXOR X14, X3 - PADDD X4, X1 - PXOR X1, X10 - ROL16(X10, X14) - PADDD X10, X7 - PXOR X7, X4 - MOVO X4, X14 - PSLLL $0x0c, X14 - PSRLL $0x14, X4 - PXOR X14, X4 - PADDD X4, X1 - PXOR X1, X10 - ROL8(X10, X14) - PADDD X10, X7 - PXOR X7, X4 - MOVO X4, X14 - PSLLL $0x07, X14 - PSRLL $0x19, X4 - PXOR X14, X4 - PADDD X5, X2 - PXOR X2, X11 - ROL16(X11, X14) - PADDD X11, X8 - PXOR X8, X5 - MOVO X5, X14 - PSLLL $0x0c, X14 - PSRLL $0x14, X5 - PXOR X14, X5 - PADDD X5, X2 - PXOR X2, X11 - ROL8(X11, X14) - PADDD X11, X8 - PXOR X8, X5 - MOVO X5, X14 - PSLLL $0x07, X14 - PSRLL $0x19, X5 - PXOR X14, X5 - MOVO 64(BP), X14 - MOVO X7, 64(BP) - PADDD X13, X12 - PXOR X12, X15 - ROL16(X15, X7) - PADDD X15, X14 - PXOR X14, X13 - MOVO X13, X7 - PSLLL $0x0c, X7 - PSRLL $0x14, X13 - PXOR X7, X13 - PADDD X13, X12 - PXOR X12, X15 - ROL8(X15, X7) - PADDD X15, X14 - PXOR X14, X13 - MOVO X13, X7 - PSLLL $0x07, X7 - PSRLL $0x19, X13 - PXOR X7, X13 - MOVO 64(BP), X7 - ADDQ (R9), R10 - ADCQ 8(R9), R11 - ADCQ $0x01, R12 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xdb - BYTE $0x04 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xe4 - BYTE $0x04 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xed - BYTE $0x04 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xed - BYTE $0x04 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xf6 - BYTE $0x08 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xff - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xc0 - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xf6 - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xc9 - BYTE $0x0c - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xd2 - BYTE $0x0c - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xdb - BYTE $0x0c - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xff - BYTE $0x0c - MOVQ (BP), AX - MOVQ AX, R15 - MULQ R10 - MOVQ AX, R13 - MOVQ DX, R14 - MOVQ (BP), AX - MULQ R11 - IMULQ R12, R15 - ADDQ AX, R14 - ADCQ DX, R15 - MOVQ 8(BP), AX - MOVQ AX, R8 - MULQ R10 - ADDQ AX, R14 - ADCQ $0x00, DX - MOVQ DX, R10 - MOVQ 8(BP), AX - MULQ R11 - ADDQ AX, R15 - ADCQ $0x00, DX - LEAQ 16(R9), R9 - MOVO X14, 64(BP) - PADDD X3, X0 - PXOR X0, X9 - ROL16(X9, X14) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X14 - PSLLL $0x0c, X14 - PSRLL $0x14, X3 - PXOR X14, X3 - PADDD X3, X0 - PXOR X0, X9 - ROL8(X9, X14) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X14 - PSLLL $0x07, X14 - PSRLL $0x19, X3 - PXOR X14, X3 - PADDD X4, X1 - PXOR X1, X10 - ROL16(X10, X14) - PADDD X10, X7 - PXOR X7, X4 - MOVO X4, X14 - PSLLL $0x0c, X14 - PSRLL $0x14, X4 - PXOR X14, X4 - PADDD X4, X1 - PXOR X1, X10 - ROL8(X10, X14) - PADDD X10, X7 - PXOR X7, X4 - MOVO X4, X14 - PSLLL $0x07, X14 - PSRLL $0x19, X4 - PXOR X14, X4 - PADDD X5, X2 - PXOR X2, X11 - ROL16(X11, X14) - PADDD X11, X8 - PXOR X8, X5 - MOVO X5, X14 - PSLLL $0x0c, X14 - PSRLL $0x14, X5 - PXOR X14, X5 - PADDD X5, X2 - PXOR X2, X11 - ROL8(X11, X14) - PADDD X11, X8 - PXOR X8, X5 - MOVO X5, X14 - PSLLL $0x07, X14 - PSRLL $0x19, X5 - PXOR X14, X5 - MOVO 64(BP), X14 - MOVO X7, 64(BP) - IMULQ R12, R8 - ADDQ R10, R15 - ADCQ DX, R8 - PADDD X13, X12 - PXOR X12, X15 - ROL16(X15, X7) - PADDD X15, X14 - PXOR X14, X13 - MOVO X13, X7 - PSLLL $0x0c, X7 - PSRLL $0x14, X13 - PXOR X7, X13 - PADDD X13, X12 - PXOR X12, X15 - ROL8(X15, X7) - PADDD X15, X14 - PXOR X14, X13 - MOVO X13, X7 - PSLLL $0x07, X7 - PSRLL $0x19, X13 - PXOR X7, X13 - MOVO 64(BP), X7 - MOVQ R13, R10 - MOVQ R14, R11 - MOVQ R15, R12 - ANDQ $0x03, R12 - MOVQ R15, R13 - ANDQ $-4, R13 - MOVQ R8, R14 - SHRQ $0x02, R8, R15 - SHRQ $0x02, R8 - ADDQ R13, R10 - ADCQ R14, R11 - ADCQ $0x00, R12 - ADDQ R15, R10 - ADCQ R8, R11 - ADCQ $0x00, R12 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xdb - BYTE $0x0c - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xe4 - BYTE $0x0c - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xed - BYTE $0x0c - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xed - BYTE $0x0c - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xf6 - BYTE $0x08 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xff - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xc0 - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xf6 - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xc9 - BYTE $0x04 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xd2 - BYTE $0x04 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xdb - BYTE $0x04 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xff - BYTE $0x04 - DECQ CX - JGE openSSEInternalLoop - ADDQ (R9), R10 - ADCQ 8(R9), R11 - ADCQ $0x01, R12 - MOVQ (BP), AX - MOVQ AX, R15 - MULQ R10 - MOVQ AX, R13 - MOVQ DX, R14 - MOVQ (BP), AX - MULQ R11 - IMULQ R12, R15 - ADDQ AX, R14 - ADCQ DX, R15 - MOVQ 8(BP), AX - MOVQ AX, R8 - MULQ R10 - ADDQ AX, R14 - ADCQ $0x00, DX - MOVQ DX, R10 - MOVQ 8(BP), AX - MULQ R11 - ADDQ AX, R15 - ADCQ $0x00, DX - IMULQ R12, R8 - ADDQ R10, R15 - ADCQ DX, R8 - MOVQ R13, R10 - MOVQ R14, R11 - MOVQ R15, R12 - ANDQ $0x03, R12 - MOVQ R15, R13 - ANDQ $-4, R13 - MOVQ R8, R14 - SHRQ $0x02, R8, R15 - SHRQ $0x02, R8 - ADDQ R13, R10 - ADCQ R14, R11 - ADCQ $0x00, R12 - ADDQ R15, R10 - ADCQ R8, R11 - ADCQ $0x00, R12 - LEAQ 16(R9), R9 - CMPQ CX, $-6 - JG openSSEInternalLoop - - // Add in the state - PADDD ·chacha20Constants<>+0(SB), X0 - PADDD ·chacha20Constants<>+0(SB), X1 - PADDD ·chacha20Constants<>+0(SB), X2 - PADDD ·chacha20Constants<>+0(SB), X12 - PADDD 32(BP), X3 - PADDD 32(BP), X4 - PADDD 32(BP), X5 - PADDD 32(BP), X13 - PADDD 48(BP), X6 - PADDD 48(BP), X7 - PADDD 48(BP), X8 - PADDD 48(BP), X14 - PADDD 80(BP), X9 - PADDD 96(BP), X10 - PADDD 112(BP), X11 - PADDD 128(BP), X15 - - // Load - xor - store - MOVO X15, 64(BP) - MOVOU (SI), X15 - PXOR X15, X0 - MOVOU X0, (DI) - MOVOU 16(SI), X15 - PXOR X15, X3 - MOVOU X3, 16(DI) - MOVOU 32(SI), X15 - PXOR X15, X6 - MOVOU X6, 32(DI) - MOVOU 48(SI), X15 - PXOR X15, X9 - MOVOU X9, 48(DI) - MOVOU 64(SI), X9 - PXOR X9, X1 - MOVOU X1, 64(DI) - MOVOU 80(SI), X9 - PXOR X9, X4 - MOVOU X4, 80(DI) - MOVOU 96(SI), X9 - PXOR X9, X7 - MOVOU X7, 96(DI) - MOVOU 112(SI), X9 - PXOR X9, X10 - MOVOU X10, 112(DI) - MOVOU 128(SI), X9 - PXOR X9, X2 - MOVOU X2, 128(DI) - MOVOU 144(SI), X9 - PXOR X9, X5 - MOVOU X5, 144(DI) - MOVOU 160(SI), X9 - PXOR X9, X8 - MOVOU X8, 160(DI) - MOVOU 176(SI), X9 - PXOR X9, X11 - MOVOU X11, 176(DI) - MOVOU 192(SI), X9 - PXOR X9, X12 - MOVOU X12, 192(DI) - MOVOU 208(SI), X9 - PXOR X9, X13 - MOVOU X13, 208(DI) - MOVOU 224(SI), X9 - PXOR X9, X14 - MOVOU X14, 224(DI) - MOVOU 240(SI), X9 - PXOR 64(BP), X9 - MOVOU X9, 240(DI) - LEAQ 256(SI), SI - LEAQ 256(DI), DI - SUBQ $0x00000100, BX - JMP openSSEMainLoop - -openSSEMainLoopDone: - // Handle the various tail sizes efficiently - TESTQ BX, BX - JE openSSEFinalize - CMPQ BX, $0x40 - JBE openSSETail64 - CMPQ BX, $0x80 - JBE openSSETail128 - CMPQ BX, $0xc0 - JBE openSSETail192 - JMP openSSETail256 - -openSSEFinalize: - // Hash in the PT, AAD lengths - ADDQ ad_len+80(FP), R10 - ADCQ src_len+56(FP), R11 +openAVX2InitialHash64: + ADDQ (SI)(CX*1), R10 + ADCQ 8(SI)(CX*1), R11 ADCQ $0x01, R12 - MOVQ (BP), AX - MOVQ AX, R15 - MULQ R10 - MOVQ AX, R13 - MOVQ DX, R14 - MOVQ (BP), AX - MULQ R11 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 IMULQ R12, R15 + MULXQ R11, AX, DX ADDQ AX, R14 ADCQ DX, R15 - MOVQ 8(BP), AX - MOVQ AX, R8 - MULQ R10 - ADDQ AX, R14 - ADCQ $0x00, DX - MOVQ DX, R10 - MOVQ 8(BP), AX - MULQ R11 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX ADDQ AX, R15 - ADCQ $0x00, DX - IMULQ R12, R8 - ADDQ R10, R15 ADCQ DX, R8 MOVQ R13, R10 MOVQ R14, R11 @@ -878,3187 +293,494 @@ openSSEFinalize: ADDQ R15, R10 ADCQ R8, R11 ADCQ $0x00, R12 + ADDQ $0x10, CX + CMPQ CX, $0x40 + JNE openAVX2InitialHash64 - // Final reduce - MOVQ R10, R13 - MOVQ R11, R14 - MOVQ R12, R15 - SUBQ $-5, R10 - SBBQ $-1, R11 - SBBQ $0x03, R12 - CMOVQCS R13, R10 - CMOVQCS R14, R11 - CMOVQCS R15, R12 - - // Add in the "s" part of the key - ADDQ 16(BP), R10 - ADCQ 24(BP), R11 - - // Finally, constant time compare to the tag at the end of the message - XORQ AX, AX - MOVQ $0x00000001, DX - XORQ (SI), R10 - XORQ 8(SI), R11 - ORQ R11, R10 - CMOVQEQ DX, AX + // Decrypt the first 64 bytes + VPXOR (SI), Y0, Y0 + VPXOR 32(SI), Y14, Y14 + VMOVDQU Y0, (DI) + VMOVDQU Y14, 32(DI) + LEAQ 64(SI), SI + LEAQ 64(DI), DI + SUBQ $0x40, BX - // Return true iff tags are equal - MOVB AX, ret+96(FP) - RET +openAVX2MainLoop: + CMPQ BX, $0x00000200 + JB openAVX2MainLoopDone -openSSE128: - MOVOU ·chacha20Constants<>+0(SB), X0 - MOVOU 16(R8), X3 - MOVOU 32(R8), X6 - MOVOU 48(R8), X9 - MOVO X0, X1 - MOVO X3, X4 - MOVO X6, X7 - MOVO X9, X10 - PADDL ·sseIncMask<>+0(SB), X10 - MOVO X1, X2 - MOVO X4, X5 - MOVO X7, X8 - MOVO X10, X11 - PADDL ·sseIncMask<>+0(SB), X11 - MOVO X3, X13 - MOVO X6, X14 - MOVO X10, X15 - MOVQ $0x0000000a, R9 - -openSSE128InnerCipherLoop: - PADDD X3, X0 - PXOR X0, X9 - ROL16(X9, X12) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X12 - PSLLL $0x0c, X12 - PSRLL $0x14, X3 - PXOR X12, X3 - PADDD X3, X0 - PXOR X0, X9 - ROL8(X9, X12) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X12 - PSLLL $0x07, X12 - PSRLL $0x19, X3 - PXOR X12, X3 - PADDD X4, X1 - PXOR X1, X10 - ROL16(X10, X12) - PADDD X10, X7 - PXOR X7, X4 - MOVO X4, X12 - PSLLL $0x0c, X12 - PSRLL $0x14, X4 - PXOR X12, X4 - PADDD X4, X1 - PXOR X1, X10 - ROL8(X10, X12) - PADDD X10, X7 - PXOR X7, X4 - MOVO X4, X12 - PSLLL $0x07, X12 - PSRLL $0x19, X4 - PXOR X12, X4 - PADDD X5, X2 - PXOR X2, X11 - ROL16(X11, X12) - PADDD X11, X8 - PXOR X8, X5 - MOVO X5, X12 - PSLLL $0x0c, X12 - PSRLL $0x14, X5 - PXOR X12, X5 - PADDD X5, X2 - PXOR X2, X11 - ROL8(X11, X12) - PADDD X11, X8 - PXOR X8, X5 - MOVO X5, X12 - PSLLL $0x07, X12 - PSRLL $0x19, X5 - PXOR X12, X5 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xdb - BYTE $0x04 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xe4 - BYTE $0x04 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xed - BYTE $0x04 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xf6 - BYTE $0x08 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xff - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xc0 - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xc9 - BYTE $0x0c - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xd2 - BYTE $0x0c - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xdb - BYTE $0x0c - PADDD X3, X0 - PXOR X0, X9 - ROL16(X9, X12) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X12 - PSLLL $0x0c, X12 - PSRLL $0x14, X3 - PXOR X12, X3 - PADDD X3, X0 - PXOR X0, X9 - ROL8(X9, X12) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X12 - PSLLL $0x07, X12 - PSRLL $0x19, X3 - PXOR X12, X3 - PADDD X4, X1 - PXOR X1, X10 - ROL16(X10, X12) - PADDD X10, X7 - PXOR X7, X4 - MOVO X4, X12 - PSLLL $0x0c, X12 - PSRLL $0x14, X4 - PXOR X12, X4 - PADDD X4, X1 - PXOR X1, X10 - ROL8(X10, X12) - PADDD X10, X7 - PXOR X7, X4 - MOVO X4, X12 - PSLLL $0x07, X12 - PSRLL $0x19, X4 - PXOR X12, X4 - PADDD X5, X2 - PXOR X2, X11 - ROL16(X11, X12) - PADDD X11, X8 - PXOR X8, X5 - MOVO X5, X12 - PSLLL $0x0c, X12 - PSRLL $0x14, X5 - PXOR X12, X5 - PADDD X5, X2 - PXOR X2, X11 - ROL8(X11, X12) - PADDD X11, X8 - PXOR X8, X5 - MOVO X5, X12 - PSLLL $0x07, X12 - PSRLL $0x19, X5 - PXOR X12, X5 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xdb - BYTE $0x0c - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xe4 - BYTE $0x0c - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xed - BYTE $0x0c - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xf6 - BYTE $0x08 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xff - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xc0 - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xc9 - BYTE $0x04 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xd2 - BYTE $0x04 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xdb - BYTE $0x04 - DECQ R9 - JNE openSSE128InnerCipherLoop - - // A0|B0 hold the Poly1305 32-byte key, C0,D0 can be discarded - PADDL ·chacha20Constants<>+0(SB), X0 - PADDL ·chacha20Constants<>+0(SB), X1 - PADDL ·chacha20Constants<>+0(SB), X2 - PADDL X13, X3 - PADDL X13, X4 - PADDL X13, X5 - PADDL X14, X7 - PADDL X14, X8 - PADDL X15, X10 - PADDL ·sseIncMask<>+0(SB), X15 - PADDL X15, X11 - - // Clamp and store the key - PAND ·polyClampMask<>+0(SB), X0 - MOVOU X0, (BP) - MOVOU X3, 16(BP) + // Load state, increment counter blocks, store the incremented counters + VMOVDQU ·chacha20Constants<>+0(SB), Y0 + VMOVDQA Y0, Y5 + VMOVDQA Y0, Y6 + VMOVDQA Y0, Y7 + VMOVDQA 32(BP), Y14 + VMOVDQA Y14, Y9 + VMOVDQA Y14, Y10 + VMOVDQA Y14, Y11 + VMOVDQA 64(BP), Y12 + VMOVDQA Y12, Y13 + VMOVDQA Y12, Y8 + VMOVDQA Y12, Y15 + VMOVDQA 192(BP), Y4 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y4 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y1 + VPADDD ·avx2IncMask<>+0(SB), Y1, Y2 + VPADDD ·avx2IncMask<>+0(SB), Y2, Y3 + VMOVDQA Y4, 96(BP) + VMOVDQA Y1, 128(BP) + VMOVDQA Y2, 160(BP) + VMOVDQA Y3, 192(BP) + XORQ CX, CX - // Hash - MOVQ ad_len+80(FP), R9 - CALL polyHashADInternal<>(SB) - -openSSE128Open: - CMPQ BX, $0x10 - JB openSSETail16 - SUBQ $0x10, BX - - // Load for hashing - ADDQ (SI), R10 - ADCQ 8(SI), R11 - ADCQ $0x01, R12 - - // Load for decryption - MOVOU (SI), X12 - PXOR X12, X1 - MOVOU X1, (DI) - LEAQ 16(SI), SI - LEAQ 16(DI), DI - MOVQ (BP), AX - MOVQ AX, R15 - MULQ R10 - MOVQ AX, R13 - MOVQ DX, R14 - MOVQ (BP), AX - MULQ R11 - IMULQ R12, R15 - ADDQ AX, R14 - ADCQ DX, R15 - MOVQ 8(BP), AX - MOVQ AX, R8 - MULQ R10 - ADDQ AX, R14 - ADCQ $0x00, DX - MOVQ DX, R10 - MOVQ 8(BP), AX - MULQ R11 - ADDQ AX, R15 - ADCQ $0x00, DX - IMULQ R12, R8 - ADDQ R10, R15 - ADCQ DX, R8 - MOVQ R13, R10 - MOVQ R14, R11 - MOVQ R15, R12 - ANDQ $0x03, R12 - MOVQ R15, R13 - ANDQ $-4, R13 - MOVQ R8, R14 - SHRQ $0x02, R8, R15 - SHRQ $0x02, R8 - ADDQ R13, R10 - ADCQ R14, R11 - ADCQ $0x00, R12 - ADDQ R15, R10 - ADCQ R8, R11 - ADCQ $0x00, R12 - - // Shift the stream "left" - MOVO X4, X1 - MOVO X7, X4 - MOVO X10, X7 - MOVO X2, X10 - MOVO X5, X2 - MOVO X8, X5 - MOVO X11, X8 - JMP openSSE128Open - -openSSETail16: - TESTQ BX, BX - JE openSSEFinalize - - // We can safely load the CT from the end, because it is padded with the MAC - MOVQ BX, R9 - SHLQ $0x04, R9 - LEAQ ·andMask<>+0(SB), R13 - MOVOU (SI), X12 - ADDQ BX, SI - PAND -16(R13)(R9*1), X12 - MOVO X12, 64(BP) - MOVQ X12, R13 - MOVQ 72(BP), R14 - PXOR X1, X12 - - // We can only store one byte at a time, since plaintext can be shorter than 16 bytes -openSSETail16Store: - MOVQ X12, R8 - MOVB R8, (DI) - PSRLDQ $0x01, X12 - INCQ DI - DECQ BX - JNE openSSETail16Store - ADDQ R13, R10 - ADCQ R14, R11 - ADCQ $0x01, R12 - MOVQ (BP), AX - MOVQ AX, R15 - MULQ R10 - MOVQ AX, R13 - MOVQ DX, R14 - MOVQ (BP), AX - MULQ R11 - IMULQ R12, R15 - ADDQ AX, R14 - ADCQ DX, R15 - MOVQ 8(BP), AX - MOVQ AX, R8 - MULQ R10 - ADDQ AX, R14 - ADCQ $0x00, DX - MOVQ DX, R10 - MOVQ 8(BP), AX - MULQ R11 - ADDQ AX, R15 - ADCQ $0x00, DX - IMULQ R12, R8 - ADDQ R10, R15 - ADCQ DX, R8 - MOVQ R13, R10 - MOVQ R14, R11 - MOVQ R15, R12 - ANDQ $0x03, R12 - MOVQ R15, R13 - ANDQ $-4, R13 - MOVQ R8, R14 - SHRQ $0x02, R8, R15 - SHRQ $0x02, R8 - ADDQ R13, R10 - ADCQ R14, R11 - ADCQ $0x00, R12 - ADDQ R15, R10 - ADCQ R8, R11 - ADCQ $0x00, R12 - JMP openSSEFinalize - -openSSETail64: - MOVO ·chacha20Constants<>+0(SB), X0 - MOVO 32(BP), X3 - MOVO 48(BP), X6 - MOVO 128(BP), X9 - PADDL ·sseIncMask<>+0(SB), X9 - MOVO X9, 80(BP) - XORQ R9, R9 - MOVQ BX, CX - CMPQ CX, $0x10 - JB openSSETail64LoopB - -openSSETail64LoopA: - ADDQ (SI)(R9*1), R10 - ADCQ 8(SI)(R9*1), R11 - ADCQ $0x01, R12 - MOVQ (BP), AX - MOVQ AX, R15 - MULQ R10 - MOVQ AX, R13 - MOVQ DX, R14 - MOVQ (BP), AX - MULQ R11 - IMULQ R12, R15 - ADDQ AX, R14 - ADCQ DX, R15 - MOVQ 8(BP), AX - MOVQ AX, R8 - MULQ R10 - ADDQ AX, R14 - ADCQ $0x00, DX - MOVQ DX, R10 - MOVQ 8(BP), AX - MULQ R11 - ADDQ AX, R15 - ADCQ $0x00, DX - IMULQ R12, R8 - ADDQ R10, R15 - ADCQ DX, R8 - MOVQ R13, R10 - MOVQ R14, R11 - MOVQ R15, R12 - ANDQ $0x03, R12 - MOVQ R15, R13 - ANDQ $-4, R13 - MOVQ R8, R14 - SHRQ $0x02, R8, R15 - SHRQ $0x02, R8 - ADDQ R13, R10 - ADCQ R14, R11 - ADCQ $0x00, R12 - ADDQ R15, R10 - ADCQ R8, R11 - ADCQ $0x00, R12 - SUBQ $0x10, CX - -openSSETail64LoopB: - ADDQ $0x10, R9 - PADDD X3, X0 - PXOR X0, X9 - ROL16(X9, X12) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X12 - PSLLL $0x0c, X12 - PSRLL $0x14, X3 - PXOR X12, X3 - PADDD X3, X0 - PXOR X0, X9 - ROL8(X9, X12) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X12 - PSLLL $0x07, X12 - PSRLL $0x19, X3 - PXOR X12, X3 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xdb - BYTE $0x04 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xf6 - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xc9 - BYTE $0x0c - PADDD X3, X0 - PXOR X0, X9 - ROL16(X9, X12) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X12 - PSLLL $0x0c, X12 - PSRLL $0x14, X3 - PXOR X12, X3 - PADDD X3, X0 - PXOR X0, X9 - ROL8(X9, X12) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X12 - PSLLL $0x07, X12 - PSRLL $0x19, X3 - PXOR X12, X3 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xdb - BYTE $0x0c - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xf6 - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xc9 - BYTE $0x04 - CMPQ CX, $0x10 - JAE openSSETail64LoopA - CMPQ R9, $0xa0 - JNE openSSETail64LoopB - PADDL ·chacha20Constants<>+0(SB), X0 - PADDL 32(BP), X3 - PADDL 48(BP), X6 - PADDL 80(BP), X9 - -openSSETail64DecLoop: - CMPQ BX, $0x10 - JB openSSETail64DecLoopDone - SUBQ $0x10, BX - MOVOU (SI), X12 - PXOR X12, X0 - MOVOU X0, (DI) - LEAQ 16(SI), SI - LEAQ 16(DI), DI - MOVO X3, X0 - MOVO X6, X3 - MOVO X9, X6 - JMP openSSETail64DecLoop - -openSSETail64DecLoopDone: - MOVO X0, X1 - JMP openSSETail16 - -openSSETail128: - MOVO ·chacha20Constants<>+0(SB), X1 - MOVO 32(BP), X4 - MOVO 48(BP), X7 - MOVO 128(BP), X10 - PADDL ·sseIncMask<>+0(SB), X10 - MOVO X10, 80(BP) - MOVO X1, X0 - MOVO X4, X3 - MOVO X7, X6 - MOVO X10, X9 - PADDL ·sseIncMask<>+0(SB), X9 - MOVO X9, 96(BP) - XORQ R9, R9 - MOVQ BX, CX - ANDQ $-16, CX - -openSSETail128LoopA: - ADDQ (SI)(R9*1), R10 - ADCQ 8(SI)(R9*1), R11 - ADCQ $0x01, R12 - MOVQ (BP), AX - MOVQ AX, R15 - MULQ R10 - MOVQ AX, R13 - MOVQ DX, R14 - MOVQ (BP), AX - MULQ R11 - IMULQ R12, R15 - ADDQ AX, R14 - ADCQ DX, R15 - MOVQ 8(BP), AX - MOVQ AX, R8 - MULQ R10 - ADDQ AX, R14 - ADCQ $0x00, DX - MOVQ DX, R10 - MOVQ 8(BP), AX - MULQ R11 - ADDQ AX, R15 - ADCQ $0x00, DX - IMULQ R12, R8 - ADDQ R10, R15 - ADCQ DX, R8 - MOVQ R13, R10 - MOVQ R14, R11 - MOVQ R15, R12 - ANDQ $0x03, R12 - MOVQ R15, R13 - ANDQ $-4, R13 - MOVQ R8, R14 - SHRQ $0x02, R8, R15 - SHRQ $0x02, R8 - ADDQ R13, R10 - ADCQ R14, R11 - ADCQ $0x00, R12 - ADDQ R15, R10 - ADCQ R8, R11 - ADCQ $0x00, R12 - -openSSETail128LoopB: - ADDQ $0x10, R9 - PADDD X3, X0 - PXOR X0, X9 - ROL16(X9, X12) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X12 - PSLLL $0x0c, X12 - PSRLL $0x14, X3 - PXOR X12, X3 - PADDD X3, X0 - PXOR X0, X9 - ROL8(X9, X12) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X12 - PSLLL $0x07, X12 - PSRLL $0x19, X3 - PXOR X12, X3 - PADDD X4, X1 - PXOR X1, X10 - ROL16(X10, X12) - PADDD X10, X7 - PXOR X7, X4 - MOVO X4, X12 - PSLLL $0x0c, X12 - PSRLL $0x14, X4 - PXOR X12, X4 - PADDD X4, X1 - PXOR X1, X10 - ROL8(X10, X12) - PADDD X10, X7 - PXOR X7, X4 - MOVO X4, X12 - PSLLL $0x07, X12 - PSRLL $0x19, X4 - PXOR X12, X4 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xdb - BYTE $0x04 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xf6 - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xc9 - BYTE $0x0c - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xe4 - BYTE $0x04 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xff - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xd2 - BYTE $0x0c - PADDD X3, X0 - PXOR X0, X9 - ROL16(X9, X12) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X12 - PSLLL $0x0c, X12 - PSRLL $0x14, X3 - PXOR X12, X3 - PADDD X3, X0 - PXOR X0, X9 - ROL8(X9, X12) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X12 - PSLLL $0x07, X12 - PSRLL $0x19, X3 - PXOR X12, X3 - PADDD X4, X1 - PXOR X1, X10 - ROL16(X10, X12) - PADDD X10, X7 - PXOR X7, X4 - MOVO X4, X12 - PSLLL $0x0c, X12 - PSRLL $0x14, X4 - PXOR X12, X4 - PADDD X4, X1 - PXOR X1, X10 - ROL8(X10, X12) - PADDD X10, X7 - PXOR X7, X4 - MOVO X4, X12 - PSLLL $0x07, X12 - PSRLL $0x19, X4 - PXOR X12, X4 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xdb - BYTE $0x0c - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xf6 - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xc9 - BYTE $0x04 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xe4 - BYTE $0x0c - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xff - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xd2 - BYTE $0x04 - CMPQ R9, CX - JB openSSETail128LoopA - CMPQ R9, $0xa0 - JNE openSSETail128LoopB - PADDL ·chacha20Constants<>+0(SB), X0 - PADDL ·chacha20Constants<>+0(SB), X1 - PADDL 32(BP), X3 - PADDL 32(BP), X4 - PADDL 48(BP), X6 - PADDL 48(BP), X7 - PADDL 96(BP), X9 - PADDL 80(BP), X10 - MOVOU (SI), X12 - MOVOU 16(SI), X13 - MOVOU 32(SI), X14 - MOVOU 48(SI), X15 - PXOR X12, X1 - PXOR X13, X4 - PXOR X14, X7 - PXOR X15, X10 - MOVOU X1, (DI) - MOVOU X4, 16(DI) - MOVOU X7, 32(DI) - MOVOU X10, 48(DI) - SUBQ $0x40, BX - LEAQ 64(SI), SI - LEAQ 64(DI), DI - JMP openSSETail64DecLoop - -openSSETail192: - MOVO ·chacha20Constants<>+0(SB), X2 - MOVO 32(BP), X5 - MOVO 48(BP), X8 - MOVO 128(BP), X11 - PADDL ·sseIncMask<>+0(SB), X11 - MOVO X11, 80(BP) - MOVO X2, X1 - MOVO X5, X4 - MOVO X8, X7 - MOVO X11, X10 - PADDL ·sseIncMask<>+0(SB), X10 - MOVO X10, 96(BP) - MOVO X1, X0 - MOVO X4, X3 - MOVO X7, X6 - MOVO X10, X9 - PADDL ·sseIncMask<>+0(SB), X9 - MOVO X9, 112(BP) - MOVQ BX, CX - MOVQ $0x000000a0, R9 - CMPQ CX, $0xa0 - CMOVQGT R9, CX - ANDQ $-16, CX - XORQ R9, R9 - -openSSLTail192LoopA: - ADDQ (SI)(R9*1), R10 - ADCQ 8(SI)(R9*1), R11 - ADCQ $0x01, R12 - MOVQ (BP), AX - MOVQ AX, R15 - MULQ R10 - MOVQ AX, R13 - MOVQ DX, R14 - MOVQ (BP), AX - MULQ R11 - IMULQ R12, R15 - ADDQ AX, R14 - ADCQ DX, R15 - MOVQ 8(BP), AX - MOVQ AX, R8 - MULQ R10 - ADDQ AX, R14 - ADCQ $0x00, DX - MOVQ DX, R10 - MOVQ 8(BP), AX - MULQ R11 - ADDQ AX, R15 - ADCQ $0x00, DX - IMULQ R12, R8 - ADDQ R10, R15 - ADCQ DX, R8 - MOVQ R13, R10 - MOVQ R14, R11 - MOVQ R15, R12 - ANDQ $0x03, R12 - MOVQ R15, R13 - ANDQ $-4, R13 - MOVQ R8, R14 - SHRQ $0x02, R8, R15 - SHRQ $0x02, R8 - ADDQ R13, R10 - ADCQ R14, R11 - ADCQ $0x00, R12 - ADDQ R15, R10 - ADCQ R8, R11 - ADCQ $0x00, R12 - -openSSLTail192LoopB: - ADDQ $0x10, R9 - PADDD X3, X0 - PXOR X0, X9 - ROL16(X9, X12) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X12 - PSLLL $0x0c, X12 - PSRLL $0x14, X3 - PXOR X12, X3 - PADDD X3, X0 - PXOR X0, X9 - ROL8(X9, X12) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X12 - PSLLL $0x07, X12 - PSRLL $0x19, X3 - PXOR X12, X3 - PADDD X4, X1 - PXOR X1, X10 - ROL16(X10, X12) - PADDD X10, X7 - PXOR X7, X4 - MOVO X4, X12 - PSLLL $0x0c, X12 - PSRLL $0x14, X4 - PXOR X12, X4 - PADDD X4, X1 - PXOR X1, X10 - ROL8(X10, X12) - PADDD X10, X7 - PXOR X7, X4 - MOVO X4, X12 - PSLLL $0x07, X12 - PSRLL $0x19, X4 - PXOR X12, X4 - PADDD X5, X2 - PXOR X2, X11 - ROL16(X11, X12) - PADDD X11, X8 - PXOR X8, X5 - MOVO X5, X12 - PSLLL $0x0c, X12 - PSRLL $0x14, X5 - PXOR X12, X5 - PADDD X5, X2 - PXOR X2, X11 - ROL8(X11, X12) - PADDD X11, X8 - PXOR X8, X5 - MOVO X5, X12 - PSLLL $0x07, X12 - PSRLL $0x19, X5 - PXOR X12, X5 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xdb - BYTE $0x04 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xf6 - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xc9 - BYTE $0x0c - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xe4 - BYTE $0x04 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xff - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xd2 - BYTE $0x0c - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xed - BYTE $0x04 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xc0 - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xdb - BYTE $0x0c - PADDD X3, X0 - PXOR X0, X9 - ROL16(X9, X12) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X12 - PSLLL $0x0c, X12 - PSRLL $0x14, X3 - PXOR X12, X3 - PADDD X3, X0 - PXOR X0, X9 - ROL8(X9, X12) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X12 - PSLLL $0x07, X12 - PSRLL $0x19, X3 - PXOR X12, X3 - PADDD X4, X1 - PXOR X1, X10 - ROL16(X10, X12) - PADDD X10, X7 - PXOR X7, X4 - MOVO X4, X12 - PSLLL $0x0c, X12 - PSRLL $0x14, X4 - PXOR X12, X4 - PADDD X4, X1 - PXOR X1, X10 - ROL8(X10, X12) - PADDD X10, X7 - PXOR X7, X4 - MOVO X4, X12 - PSLLL $0x07, X12 - PSRLL $0x19, X4 - PXOR X12, X4 - PADDD X5, X2 - PXOR X2, X11 - ROL16(X11, X12) - PADDD X11, X8 - PXOR X8, X5 - MOVO X5, X12 - PSLLL $0x0c, X12 - PSRLL $0x14, X5 - PXOR X12, X5 - PADDD X5, X2 - PXOR X2, X11 - ROL8(X11, X12) - PADDD X11, X8 - PXOR X8, X5 - MOVO X5, X12 - PSLLL $0x07, X12 - PSRLL $0x19, X5 - PXOR X12, X5 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xdb - BYTE $0x0c - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xf6 - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xc9 - BYTE $0x04 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xe4 - BYTE $0x0c - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xff - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xd2 - BYTE $0x04 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xed - BYTE $0x0c - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xc0 - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xdb - BYTE $0x04 - CMPQ R9, CX - JB openSSLTail192LoopA - CMPQ R9, $0xa0 - JNE openSSLTail192LoopB - CMPQ BX, $0xb0 - JB openSSLTail192Store - ADDQ 160(SI), R10 - ADCQ 168(SI), R11 - ADCQ $0x01, R12 - MOVQ (BP), AX - MOVQ AX, R15 - MULQ R10 - MOVQ AX, R13 - MOVQ DX, R14 - MOVQ (BP), AX - MULQ R11 - IMULQ R12, R15 - ADDQ AX, R14 - ADCQ DX, R15 - MOVQ 8(BP), AX - MOVQ AX, R8 - MULQ R10 - ADDQ AX, R14 - ADCQ $0x00, DX - MOVQ DX, R10 - MOVQ 8(BP), AX - MULQ R11 - ADDQ AX, R15 - ADCQ $0x00, DX - IMULQ R12, R8 - ADDQ R10, R15 - ADCQ DX, R8 - MOVQ R13, R10 - MOVQ R14, R11 - MOVQ R15, R12 - ANDQ $0x03, R12 - MOVQ R15, R13 - ANDQ $-4, R13 - MOVQ R8, R14 - SHRQ $0x02, R8, R15 - SHRQ $0x02, R8 - ADDQ R13, R10 - ADCQ R14, R11 - ADCQ $0x00, R12 - ADDQ R15, R10 - ADCQ R8, R11 - ADCQ $0x00, R12 - CMPQ BX, $0xc0 - JB openSSLTail192Store - ADDQ 176(SI), R10 - ADCQ 184(SI), R11 - ADCQ $0x01, R12 - MOVQ (BP), AX - MOVQ AX, R15 - MULQ R10 - MOVQ AX, R13 - MOVQ DX, R14 - MOVQ (BP), AX - MULQ R11 - IMULQ R12, R15 - ADDQ AX, R14 - ADCQ DX, R15 - MOVQ 8(BP), AX - MOVQ AX, R8 - MULQ R10 - ADDQ AX, R14 - ADCQ $0x00, DX - MOVQ DX, R10 - MOVQ 8(BP), AX - MULQ R11 - ADDQ AX, R15 - ADCQ $0x00, DX - IMULQ R12, R8 - ADDQ R10, R15 - ADCQ DX, R8 - MOVQ R13, R10 - MOVQ R14, R11 - MOVQ R15, R12 - ANDQ $0x03, R12 - MOVQ R15, R13 - ANDQ $-4, R13 - MOVQ R8, R14 - SHRQ $0x02, R8, R15 - SHRQ $0x02, R8 - ADDQ R13, R10 - ADCQ R14, R11 - ADCQ $0x00, R12 - ADDQ R15, R10 - ADCQ R8, R11 - ADCQ $0x00, R12 - -openSSLTail192Store: - PADDL ·chacha20Constants<>+0(SB), X0 - PADDL ·chacha20Constants<>+0(SB), X1 - PADDL ·chacha20Constants<>+0(SB), X2 - PADDL 32(BP), X3 - PADDL 32(BP), X4 - PADDL 32(BP), X5 - PADDL 48(BP), X6 - PADDL 48(BP), X7 - PADDL 48(BP), X8 - PADDL 112(BP), X9 - PADDL 96(BP), X10 - PADDL 80(BP), X11 - MOVOU (SI), X12 - MOVOU 16(SI), X13 - MOVOU 32(SI), X14 - MOVOU 48(SI), X15 - PXOR X12, X2 - PXOR X13, X5 - PXOR X14, X8 - PXOR X15, X11 - MOVOU X2, (DI) - MOVOU X5, 16(DI) - MOVOU X8, 32(DI) - MOVOU X11, 48(DI) - MOVOU 64(SI), X12 - MOVOU 80(SI), X13 - MOVOU 96(SI), X14 - MOVOU 112(SI), X15 - PXOR X12, X1 - PXOR X13, X4 - PXOR X14, X7 - PXOR X15, X10 - MOVOU X1, 64(DI) - MOVOU X4, 80(DI) - MOVOU X7, 96(DI) - MOVOU X10, 112(DI) - SUBQ $0x80, BX - LEAQ 128(SI), SI - LEAQ 128(DI), DI - JMP openSSETail64DecLoop - -openSSETail256: - MOVO ·chacha20Constants<>+0(SB), X0 - MOVO 32(BP), X3 - MOVO 48(BP), X6 - MOVO 128(BP), X9 - PADDL ·sseIncMask<>+0(SB), X9 - MOVO X0, X1 - MOVO X3, X4 - MOVO X6, X7 - MOVO X9, X10 - PADDL ·sseIncMask<>+0(SB), X10 - MOVO X1, X2 - MOVO X4, X5 - MOVO X7, X8 - MOVO X10, X11 - PADDL ·sseIncMask<>+0(SB), X11 - MOVO X2, X12 - MOVO X5, X13 - MOVO X8, X14 - MOVO X11, X15 - PADDL ·sseIncMask<>+0(SB), X15 - - // Store counters - MOVO X9, 80(BP) - MOVO X10, 96(BP) - MOVO X11, 112(BP) - MOVO X15, 128(BP) - XORQ R9, R9 - -openSSETail256Loop: - ADDQ (SI)(R9*1), R10 - ADCQ 8(SI)(R9*1), R11 - ADCQ $0x01, R12 - MOVO X14, 64(BP) - PADDD X3, X0 - PXOR X0, X9 - ROL16(X9, X14) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X14 - PSLLL $0x0c, X14 - PSRLL $0x14, X3 - PXOR X14, X3 - PADDD X3, X0 - PXOR X0, X9 - ROL8(X9, X14) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X14 - PSLLL $0x07, X14 - PSRLL $0x19, X3 - PXOR X14, X3 - PADDD X4, X1 - PXOR X1, X10 - ROL16(X10, X14) - PADDD X10, X7 - PXOR X7, X4 - MOVO X4, X14 - PSLLL $0x0c, X14 - PSRLL $0x14, X4 - PXOR X14, X4 - PADDD X4, X1 - PXOR X1, X10 - ROL8(X10, X14) - PADDD X10, X7 - PXOR X7, X4 - MOVO X4, X14 - PSLLL $0x07, X14 - PSRLL $0x19, X4 - PXOR X14, X4 - PADDD X5, X2 - PXOR X2, X11 - ROL16(X11, X14) - PADDD X11, X8 - PXOR X8, X5 - MOVO X5, X14 - PSLLL $0x0c, X14 - PSRLL $0x14, X5 - PXOR X14, X5 - PADDD X5, X2 - PXOR X2, X11 - ROL8(X11, X14) - PADDD X11, X8 - PXOR X8, X5 - MOVO X5, X14 - PSLLL $0x07, X14 - PSRLL $0x19, X5 - PXOR X14, X5 - MOVO 64(BP), X14 - MOVO X7, 64(BP) - PADDD X13, X12 - PXOR X12, X15 - ROL16(X15, X7) - PADDD X15, X14 - PXOR X14, X13 - MOVO X13, X7 - PSLLL $0x0c, X7 - PSRLL $0x14, X13 - PXOR X7, X13 - PADDD X13, X12 - PXOR X12, X15 - ROL8(X15, X7) - PADDD X15, X14 - PXOR X14, X13 - MOVO X13, X7 - PSLLL $0x07, X7 - PSRLL $0x19, X13 - PXOR X7, X13 - MOVO 64(BP), X7 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xdb - BYTE $0x04 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xe4 - BYTE $0x04 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xed - BYTE $0x04 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xed - BYTE $0x04 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xf6 - BYTE $0x08 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xff - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xc0 - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xf6 - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xc9 - BYTE $0x0c - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xd2 - BYTE $0x0c - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xdb - BYTE $0x0c - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xff - BYTE $0x0c - MOVQ (BP), AX - MOVQ AX, R15 - MULQ R10 - MOVQ AX, R13 - MOVQ DX, R14 - MOVQ (BP), AX - MULQ R11 - IMULQ R12, R15 - ADDQ AX, R14 - ADCQ DX, R15 - MOVQ 8(BP), AX - MOVQ AX, R8 - MULQ R10 - ADDQ AX, R14 - ADCQ $0x00, DX - MOVQ DX, R10 - MOVQ 8(BP), AX - MULQ R11 - ADDQ AX, R15 - ADCQ $0x00, DX - MOVO X14, 64(BP) - PADDD X3, X0 - PXOR X0, X9 - ROL16(X9, X14) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X14 - PSLLL $0x0c, X14 - PSRLL $0x14, X3 - PXOR X14, X3 - PADDD X3, X0 - PXOR X0, X9 - ROL8(X9, X14) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X14 - PSLLL $0x07, X14 - PSRLL $0x19, X3 - PXOR X14, X3 - PADDD X4, X1 - PXOR X1, X10 - ROL16(X10, X14) - PADDD X10, X7 - PXOR X7, X4 - MOVO X4, X14 - PSLLL $0x0c, X14 - PSRLL $0x14, X4 - PXOR X14, X4 - PADDD X4, X1 - PXOR X1, X10 - ROL8(X10, X14) - PADDD X10, X7 - PXOR X7, X4 - MOVO X4, X14 - PSLLL $0x07, X14 - PSRLL $0x19, X4 - PXOR X14, X4 - PADDD X5, X2 - PXOR X2, X11 - ROL16(X11, X14) - PADDD X11, X8 - PXOR X8, X5 - MOVO X5, X14 - PSLLL $0x0c, X14 - PSRLL $0x14, X5 - PXOR X14, X5 - PADDD X5, X2 - PXOR X2, X11 - ROL8(X11, X14) - PADDD X11, X8 - PXOR X8, X5 - MOVO X5, X14 - PSLLL $0x07, X14 - PSRLL $0x19, X5 - PXOR X14, X5 - MOVO 64(BP), X14 - MOVO X7, 64(BP) - PADDD X13, X12 - PXOR X12, X15 - ROL16(X15, X7) - PADDD X15, X14 - PXOR X14, X13 - MOVO X13, X7 - PSLLL $0x0c, X7 - PSRLL $0x14, X13 - PXOR X7, X13 - PADDD X13, X12 - PXOR X12, X15 - ROL8(X15, X7) - PADDD X15, X14 - PXOR X14, X13 - MOVO X13, X7 - PSLLL $0x07, X7 - PSRLL $0x19, X13 - PXOR X7, X13 - MOVO 64(BP), X7 - IMULQ R12, R8 - ADDQ R10, R15 - ADCQ DX, R8 - MOVQ R13, R10 - MOVQ R14, R11 - MOVQ R15, R12 - ANDQ $0x03, R12 - MOVQ R15, R13 - ANDQ $-4, R13 - MOVQ R8, R14 - SHRQ $0x02, R8, R15 - SHRQ $0x02, R8 - ADDQ R13, R10 - ADCQ R14, R11 - ADCQ $0x00, R12 - ADDQ R15, R10 - ADCQ R8, R11 - ADCQ $0x00, R12 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xdb - BYTE $0x0c - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xe4 - BYTE $0x0c - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xed - BYTE $0x0c - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xed - BYTE $0x0c - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xf6 - BYTE $0x08 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xff - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xc0 - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xf6 - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xc9 - BYTE $0x04 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xd2 - BYTE $0x04 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xdb - BYTE $0x04 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xff - BYTE $0x04 - ADDQ $0x10, R9 - CMPQ R9, $0xa0 - JB openSSETail256Loop - MOVQ BX, CX - ANDQ $-16, CX - -openSSETail256HashLoop: - ADDQ (SI)(R9*1), R10 - ADCQ 8(SI)(R9*1), R11 - ADCQ $0x01, R12 - MOVQ (BP), AX - MOVQ AX, R15 - MULQ R10 - MOVQ AX, R13 - MOVQ DX, R14 - MOVQ (BP), AX - MULQ R11 - IMULQ R12, R15 - ADDQ AX, R14 - ADCQ DX, R15 - MOVQ 8(BP), AX - MOVQ AX, R8 - MULQ R10 - ADDQ AX, R14 - ADCQ $0x00, DX - MOVQ DX, R10 - MOVQ 8(BP), AX - MULQ R11 - ADDQ AX, R15 - ADCQ $0x00, DX - IMULQ R12, R8 - ADDQ R10, R15 - ADCQ DX, R8 - MOVQ R13, R10 - MOVQ R14, R11 - MOVQ R15, R12 - ANDQ $0x03, R12 - MOVQ R15, R13 - ANDQ $-4, R13 - MOVQ R8, R14 - SHRQ $0x02, R8, R15 - SHRQ $0x02, R8 - ADDQ R13, R10 - ADCQ R14, R11 - ADCQ $0x00, R12 - ADDQ R15, R10 - ADCQ R8, R11 - ADCQ $0x00, R12 - ADDQ $0x10, R9 - CMPQ R9, CX - JB openSSETail256HashLoop - - // Add in the state - PADDD ·chacha20Constants<>+0(SB), X0 - PADDD ·chacha20Constants<>+0(SB), X1 - PADDD ·chacha20Constants<>+0(SB), X2 - PADDD ·chacha20Constants<>+0(SB), X12 - PADDD 32(BP), X3 - PADDD 32(BP), X4 - PADDD 32(BP), X5 - PADDD 32(BP), X13 - PADDD 48(BP), X6 - PADDD 48(BP), X7 - PADDD 48(BP), X8 - PADDD 48(BP), X14 - PADDD 80(BP), X9 - PADDD 96(BP), X10 - PADDD 112(BP), X11 - PADDD 128(BP), X15 - MOVO X15, 64(BP) - - // Load - xor - store - MOVOU (SI), X15 - PXOR X15, X0 - MOVOU 16(SI), X15 - PXOR X15, X3 - MOVOU 32(SI), X15 - PXOR X15, X6 - MOVOU 48(SI), X15 - PXOR X15, X9 - MOVOU X0, (DI) - MOVOU X3, 16(DI) - MOVOU X6, 32(DI) - MOVOU X9, 48(DI) - MOVOU 64(SI), X0 - MOVOU 80(SI), X3 - MOVOU 96(SI), X6 - MOVOU 112(SI), X9 - PXOR X0, X1 - PXOR X3, X4 - PXOR X6, X7 - PXOR X9, X10 - MOVOU X1, 64(DI) - MOVOU X4, 80(DI) - MOVOU X7, 96(DI) - MOVOU X10, 112(DI) - MOVOU 128(SI), X0 - MOVOU 144(SI), X3 - MOVOU 160(SI), X6 - MOVOU 176(SI), X9 - PXOR X0, X2 - PXOR X3, X5 - PXOR X6, X8 - PXOR X9, X11 - MOVOU X2, 128(DI) - MOVOU X5, 144(DI) - MOVOU X8, 160(DI) - MOVOU X11, 176(DI) - LEAQ 192(SI), SI - LEAQ 192(DI), DI - SUBQ $0xc0, BX - MOVO X12, X0 - MOVO X13, X3 - MOVO X14, X6 - MOVO 64(BP), X9 - JMP openSSETail64DecLoop - -chacha20Poly1305Open_AVX2: - VZEROUPPER - VMOVDQU ·chacha20Constants<>+0(SB), Y0 - BYTE $0xc4 - BYTE $0x42 - BYTE $0x7d - BYTE $0x5a - BYTE $0x70 - BYTE $0x10 - BYTE $0xc4 - BYTE $0x42 - BYTE $0x7d - BYTE $0x5a - BYTE $0x60 - BYTE $0x20 - BYTE $0xc4 - BYTE $0xc2 - BYTE $0x7d - BYTE $0x5a - BYTE $0x60 - BYTE $0x30 - VPADDD ·avx2InitMask<>+0(SB), Y4, Y4 - - // Special optimization, for very short buffers - CMPQ BX, $0xc0 - JBE openAVX2192 - CMPQ BX, $0x00000140 - JBE openAVX2320 - - // For the general key prepare the key first - as a byproduct we have 64 bytes of cipher stream - VMOVDQA Y14, 32(BP) - VMOVDQA Y12, 64(BP) - VMOVDQA Y4, 192(BP) - MOVQ $0x0000000a, R9 - -openAVX2PreparePolyKey: - VPADDD Y14, Y0, Y0 - VPXOR Y0, Y4, Y4 - VPSHUFB ·rol16<>+0(SB), Y4, Y4 - VPADDD Y4, Y12, Y12 - VPXOR Y12, Y14, Y14 - VPSLLD $0x0c, Y14, Y3 - VPSRLD $0x14, Y14, Y14 - VPXOR Y3, Y14, Y14 - VPADDD Y14, Y0, Y0 - VPXOR Y0, Y4, Y4 - VPSHUFB ·rol8<>+0(SB), Y4, Y4 - VPADDD Y4, Y12, Y12 - VPXOR Y12, Y14, Y14 - VPSLLD $0x07, Y14, Y3 - VPSRLD $0x19, Y14, Y14 - VPXOR Y3, Y14, Y14 - VPALIGNR $0x04, Y14, Y14, Y14 - VPALIGNR $0x08, Y12, Y12, Y12 - VPALIGNR $0x0c, Y4, Y4, Y4 - VPADDD Y14, Y0, Y0 - VPXOR Y0, Y4, Y4 - VPSHUFB ·rol16<>+0(SB), Y4, Y4 - VPADDD Y4, Y12, Y12 - VPXOR Y12, Y14, Y14 - VPSLLD $0x0c, Y14, Y3 - VPSRLD $0x14, Y14, Y14 - VPXOR Y3, Y14, Y14 - VPADDD Y14, Y0, Y0 - VPXOR Y0, Y4, Y4 - VPSHUFB ·rol8<>+0(SB), Y4, Y4 - VPADDD Y4, Y12, Y12 - VPXOR Y12, Y14, Y14 - VPSLLD $0x07, Y14, Y3 - VPSRLD $0x19, Y14, Y14 - VPXOR Y3, Y14, Y14 - VPALIGNR $0x0c, Y14, Y14, Y14 - VPALIGNR $0x08, Y12, Y12, Y12 - VPALIGNR $0x04, Y4, Y4, Y4 - DECQ R9 - JNE openAVX2PreparePolyKey - VPADDD ·chacha20Constants<>+0(SB), Y0, Y0 - VPADDD 32(BP), Y14, Y14 - VPADDD 64(BP), Y12, Y12 - VPADDD 192(BP), Y4, Y4 - VPERM2I128 $0x02, Y0, Y14, Y3 - - // Clamp and store poly key - VPAND ·polyClampMask<>+0(SB), Y3, Y3 - VMOVDQA Y3, (BP) - - // Stream for the first 64 bytes - VPERM2I128 $0x13, Y0, Y14, Y0 - VPERM2I128 $0x13, Y12, Y4, Y14 - - // Hash AD + first 64 bytes - MOVQ ad_len+80(FP), R9 - CALL polyHashADInternal<>(SB) - XORQ CX, CX - -openAVX2InitialHash64: - ADDQ (SI)(CX*1), R10 - ADCQ 8(SI)(CX*1), R11 - ADCQ $0x01, R12 - MOVQ (BP), DX - MOVQ DX, R15 - MULXQ R10, R13, R14 - IMULQ R12, R15 - MULXQ R11, AX, DX - ADDQ AX, R14 - ADCQ DX, R15 - MOVQ 8(BP), DX - MULXQ R10, R10, AX - ADDQ R10, R14 - MULXQ R11, R11, R8 - ADCQ R11, R15 - ADCQ $0x00, R8 - IMULQ R12, DX - ADDQ AX, R15 - ADCQ DX, R8 - MOVQ R13, R10 - MOVQ R14, R11 - MOVQ R15, R12 - ANDQ $0x03, R12 - MOVQ R15, R13 - ANDQ $-4, R13 - MOVQ R8, R14 - SHRQ $0x02, R8, R15 - SHRQ $0x02, R8 - ADDQ R13, R10 - ADCQ R14, R11 - ADCQ $0x00, R12 - ADDQ R15, R10 - ADCQ R8, R11 - ADCQ $0x00, R12 - ADDQ $0x10, CX - CMPQ CX, $0x40 - JNE openAVX2InitialHash64 - - // Decrypt the first 64 bytes - VPXOR (SI), Y0, Y0 - VPXOR 32(SI), Y14, Y14 - VMOVDQU Y0, (DI) - VMOVDQU Y14, 32(DI) - LEAQ 64(SI), SI - LEAQ 64(DI), DI - SUBQ $0x40, BX - -openAVX2MainLoop: - CMPQ BX, $0x00000200 - JB openAVX2MainLoopDone - - // Load state, increment counter blocks, store the incremented counters - VMOVDQU ·chacha20Constants<>+0(SB), Y0 - VMOVDQA Y0, Y5 - VMOVDQA Y0, Y6 - VMOVDQA Y0, Y7 - VMOVDQA 32(BP), Y14 - VMOVDQA Y14, Y9 - VMOVDQA Y14, Y10 - VMOVDQA Y14, Y11 - VMOVDQA 64(BP), Y12 - VMOVDQA Y12, Y13 - VMOVDQA Y12, Y8 - VMOVDQA Y12, Y15 - VMOVDQA 192(BP), Y4 - VPADDD ·avx2IncMask<>+0(SB), Y4, Y4 - VPADDD ·avx2IncMask<>+0(SB), Y4, Y1 - VPADDD ·avx2IncMask<>+0(SB), Y1, Y2 - VPADDD ·avx2IncMask<>+0(SB), Y2, Y3 - VMOVDQA Y4, 96(BP) - VMOVDQA Y1, 128(BP) - VMOVDQA Y2, 160(BP) - VMOVDQA Y3, 192(BP) - XORQ CX, CX - -openAVX2InternalLoop: - ADDQ (SI)(CX*1), R10 - ADCQ 8(SI)(CX*1), R11 - ADCQ $0x01, R12 - VPADDD Y14, Y0, Y0 - VPADDD Y9, Y5, Y5 - VPADDD Y10, Y6, Y6 - VPADDD Y11, Y7, Y7 - MOVQ (BP), DX - MOVQ DX, R15 - MULXQ R10, R13, R14 - IMULQ R12, R15 - MULXQ R11, AX, DX - ADDQ AX, R14 - ADCQ DX, R15 - VPXOR Y0, Y4, Y4 - VPXOR Y5, Y1, Y1 - VPXOR Y6, Y2, Y2 - VPXOR Y7, Y3, Y3 - VPSHUFB ·rol16<>+0(SB), Y4, Y4 - VPSHUFB ·rol16<>+0(SB), Y1, Y1 - VPSHUFB ·rol16<>+0(SB), Y2, Y2 - VPSHUFB ·rol16<>+0(SB), Y3, Y3 - MOVQ 8(BP), DX - MULXQ R10, R10, AX - ADDQ R10, R14 - MULXQ R11, R11, R8 - ADCQ R11, R15 - ADCQ $0x00, R8 - VPADDD Y4, Y12, Y12 - VPADDD Y1, Y13, Y13 - VPADDD Y2, Y8, Y8 - VPADDD Y3, Y15, Y15 - VPXOR Y12, Y14, Y14 - VPXOR Y13, Y9, Y9 - VPXOR Y8, Y10, Y10 - VPXOR Y15, Y11, Y11 - IMULQ R12, DX - ADDQ AX, R15 - ADCQ DX, R8 - VMOVDQA Y15, 224(BP) - VPSLLD $0x0c, Y14, Y15 - VPSRLD $0x14, Y14, Y14 - VPXOR Y15, Y14, Y14 - VPSLLD $0x0c, Y9, Y15 - VPSRLD $0x14, Y9, Y9 - VPXOR Y15, Y9, Y9 - VPSLLD $0x0c, Y10, Y15 - VPSRLD $0x14, Y10, Y10 - VPXOR Y15, Y10, Y10 - VPSLLD $0x0c, Y11, Y15 - VPSRLD $0x14, Y11, Y11 - VPXOR Y15, Y11, Y11 - VMOVDQA 224(BP), Y15 - MOVQ R13, R10 - MOVQ R14, R11 - MOVQ R15, R12 - ANDQ $0x03, R12 - MOVQ R15, R13 - ANDQ $-4, R13 - MOVQ R8, R14 - SHRQ $0x02, R8, R15 - SHRQ $0x02, R8 - ADDQ R13, R10 - ADCQ R14, R11 - ADCQ $0x00, R12 - ADDQ R15, R10 - ADCQ R8, R11 - ADCQ $0x00, R12 - VPADDD Y14, Y0, Y0 - VPADDD Y9, Y5, Y5 - VPADDD Y10, Y6, Y6 - VPADDD Y11, Y7, Y7 - VPXOR Y0, Y4, Y4 - VPXOR Y5, Y1, Y1 - VPXOR Y6, Y2, Y2 - VPXOR Y7, Y3, Y3 - VPSHUFB ·rol8<>+0(SB), Y4, Y4 - VPSHUFB ·rol8<>+0(SB), Y1, Y1 - VPSHUFB ·rol8<>+0(SB), Y2, Y2 - VPSHUFB ·rol8<>+0(SB), Y3, Y3 - ADDQ 16(SI)(CX*1), R10 - ADCQ 24(SI)(CX*1), R11 - ADCQ $0x01, R12 - VPADDD Y4, Y12, Y12 - VPADDD Y1, Y13, Y13 - VPADDD Y2, Y8, Y8 - VPADDD Y3, Y15, Y15 - MOVQ (BP), DX - MOVQ DX, R15 - MULXQ R10, R13, R14 - IMULQ R12, R15 - MULXQ R11, AX, DX - ADDQ AX, R14 - ADCQ DX, R15 - VPXOR Y12, Y14, Y14 - VPXOR Y13, Y9, Y9 - VPXOR Y8, Y10, Y10 - VPXOR Y15, Y11, Y11 - VMOVDQA Y15, 224(BP) - VPSLLD $0x07, Y14, Y15 - VPSRLD $0x19, Y14, Y14 - VPXOR Y15, Y14, Y14 - VPSLLD $0x07, Y9, Y15 - VPSRLD $0x19, Y9, Y9 - VPXOR Y15, Y9, Y9 - VPSLLD $0x07, Y10, Y15 - VPSRLD $0x19, Y10, Y10 - VPXOR Y15, Y10, Y10 - VPSLLD $0x07, Y11, Y15 - VPSRLD $0x19, Y11, Y11 - VPXOR Y15, Y11, Y11 - VMOVDQA 224(BP), Y15 - MOVQ 8(BP), DX - MULXQ R10, R10, AX - ADDQ R10, R14 - MULXQ R11, R11, R8 - ADCQ R11, R15 - ADCQ $0x00, R8 - VPALIGNR $0x04, Y14, Y14, Y14 - VPALIGNR $0x04, Y9, Y9, Y9 - VPALIGNR $0x04, Y10, Y10, Y10 - VPALIGNR $0x04, Y11, Y11, Y11 - VPALIGNR $0x08, Y12, Y12, Y12 - VPALIGNR $0x08, Y13, Y13, Y13 - VPALIGNR $0x08, Y8, Y8, Y8 - VPALIGNR $0x08, Y15, Y15, Y15 - VPALIGNR $0x0c, Y4, Y4, Y4 - VPALIGNR $0x0c, Y1, Y1, Y1 - VPALIGNR $0x0c, Y2, Y2, Y2 - VPALIGNR $0x0c, Y3, Y3, Y3 - VPADDD Y14, Y0, Y0 - VPADDD Y9, Y5, Y5 - VPADDD Y10, Y6, Y6 - VPADDD Y11, Y7, Y7 - IMULQ R12, DX - ADDQ AX, R15 - ADCQ DX, R8 - VPXOR Y0, Y4, Y4 - VPXOR Y5, Y1, Y1 - VPXOR Y6, Y2, Y2 - VPXOR Y7, Y3, Y3 - VPSHUFB ·rol16<>+0(SB), Y4, Y4 - VPSHUFB ·rol16<>+0(SB), Y1, Y1 - VPSHUFB ·rol16<>+0(SB), Y2, Y2 - VPSHUFB ·rol16<>+0(SB), Y3, Y3 - MOVQ R13, R10 - MOVQ R14, R11 - MOVQ R15, R12 - ANDQ $0x03, R12 - MOVQ R15, R13 - ANDQ $-4, R13 - MOVQ R8, R14 - SHRQ $0x02, R8, R15 - SHRQ $0x02, R8 - ADDQ R13, R10 - ADCQ R14, R11 - ADCQ $0x00, R12 - ADDQ R15, R10 - ADCQ R8, R11 - ADCQ $0x00, R12 - VPADDD Y4, Y12, Y12 - VPADDD Y1, Y13, Y13 - VPADDD Y2, Y8, Y8 - VPADDD Y3, Y15, Y15 - VPXOR Y12, Y14, Y14 - VPXOR Y13, Y9, Y9 - VPXOR Y8, Y10, Y10 - VPXOR Y15, Y11, Y11 - ADDQ 32(SI)(CX*1), R10 - ADCQ 40(SI)(CX*1), R11 - ADCQ $0x01, R12 - LEAQ 48(CX), CX - VMOVDQA Y15, 224(BP) - VPSLLD $0x0c, Y14, Y15 - VPSRLD $0x14, Y14, Y14 - VPXOR Y15, Y14, Y14 - VPSLLD $0x0c, Y9, Y15 - VPSRLD $0x14, Y9, Y9 - VPXOR Y15, Y9, Y9 - VPSLLD $0x0c, Y10, Y15 - VPSRLD $0x14, Y10, Y10 - VPXOR Y15, Y10, Y10 - VPSLLD $0x0c, Y11, Y15 - VPSRLD $0x14, Y11, Y11 - VPXOR Y15, Y11, Y11 - VMOVDQA 224(BP), Y15 - MOVQ (BP), DX - MOVQ DX, R15 - MULXQ R10, R13, R14 - IMULQ R12, R15 - MULXQ R11, AX, DX - ADDQ AX, R14 - ADCQ DX, R15 - VPADDD Y14, Y0, Y0 - VPADDD Y9, Y5, Y5 - VPADDD Y10, Y6, Y6 - VPADDD Y11, Y7, Y7 - VPXOR Y0, Y4, Y4 - VPXOR Y5, Y1, Y1 - VPXOR Y6, Y2, Y2 - VPXOR Y7, Y3, Y3 - MOVQ 8(BP), DX - MULXQ R10, R10, AX - ADDQ R10, R14 - MULXQ R11, R11, R8 - ADCQ R11, R15 - ADCQ $0x00, R8 - VPSHUFB ·rol8<>+0(SB), Y4, Y4 - VPSHUFB ·rol8<>+0(SB), Y1, Y1 - VPSHUFB ·rol8<>+0(SB), Y2, Y2 - VPSHUFB ·rol8<>+0(SB), Y3, Y3 - VPADDD Y4, Y12, Y12 - VPADDD Y1, Y13, Y13 - VPADDD Y2, Y8, Y8 - VPADDD Y3, Y15, Y15 - IMULQ R12, DX - ADDQ AX, R15 - ADCQ DX, R8 - VPXOR Y12, Y14, Y14 - VPXOR Y13, Y9, Y9 - VPXOR Y8, Y10, Y10 - VPXOR Y15, Y11, Y11 - VMOVDQA Y15, 224(BP) - VPSLLD $0x07, Y14, Y15 - VPSRLD $0x19, Y14, Y14 - VPXOR Y15, Y14, Y14 - VPSLLD $0x07, Y9, Y15 - VPSRLD $0x19, Y9, Y9 - VPXOR Y15, Y9, Y9 - VPSLLD $0x07, Y10, Y15 - VPSRLD $0x19, Y10, Y10 - VPXOR Y15, Y10, Y10 - VPSLLD $0x07, Y11, Y15 - VPSRLD $0x19, Y11, Y11 - VPXOR Y15, Y11, Y11 - VMOVDQA 224(BP), Y15 - MOVQ R13, R10 - MOVQ R14, R11 - MOVQ R15, R12 - ANDQ $0x03, R12 - MOVQ R15, R13 - ANDQ $-4, R13 - MOVQ R8, R14 - SHRQ $0x02, R8, R15 - SHRQ $0x02, R8 - ADDQ R13, R10 - ADCQ R14, R11 - ADCQ $0x00, R12 - ADDQ R15, R10 - ADCQ R8, R11 - ADCQ $0x00, R12 - VPALIGNR $0x0c, Y14, Y14, Y14 - VPALIGNR $0x0c, Y9, Y9, Y9 - VPALIGNR $0x0c, Y10, Y10, Y10 - VPALIGNR $0x0c, Y11, Y11, Y11 - VPALIGNR $0x08, Y12, Y12, Y12 - VPALIGNR $0x08, Y13, Y13, Y13 - VPALIGNR $0x08, Y8, Y8, Y8 - VPALIGNR $0x08, Y15, Y15, Y15 - VPALIGNR $0x04, Y4, Y4, Y4 - VPALIGNR $0x04, Y1, Y1, Y1 - VPALIGNR $0x04, Y2, Y2, Y2 - VPALIGNR $0x04, Y3, Y3, Y3 - CMPQ CX, $0x000001e0 - JNE openAVX2InternalLoop - VPADDD ·chacha20Constants<>+0(SB), Y0, Y0 - VPADDD ·chacha20Constants<>+0(SB), Y5, Y5 - VPADDD ·chacha20Constants<>+0(SB), Y6, Y6 - VPADDD ·chacha20Constants<>+0(SB), Y7, Y7 - VPADDD 32(BP), Y14, Y14 - VPADDD 32(BP), Y9, Y9 - VPADDD 32(BP), Y10, Y10 - VPADDD 32(BP), Y11, Y11 - VPADDD 64(BP), Y12, Y12 - VPADDD 64(BP), Y13, Y13 - VPADDD 64(BP), Y8, Y8 - VPADDD 64(BP), Y15, Y15 - VPADDD 96(BP), Y4, Y4 - VPADDD 128(BP), Y1, Y1 - VPADDD 160(BP), Y2, Y2 - VPADDD 192(BP), Y3, Y3 - VMOVDQA Y15, 224(BP) - - // We only hashed 480 of the 512 bytes available - hash the remaining 32 here - ADDQ 480(SI), R10 - ADCQ 488(SI), R11 - ADCQ $0x01, R12 - MOVQ (BP), DX - MOVQ DX, R15 - MULXQ R10, R13, R14 - IMULQ R12, R15 - MULXQ R11, AX, DX - ADDQ AX, R14 - ADCQ DX, R15 - MOVQ 8(BP), DX - MULXQ R10, R10, AX - ADDQ R10, R14 - MULXQ R11, R11, R8 - ADCQ R11, R15 - ADCQ $0x00, R8 - IMULQ R12, DX - ADDQ AX, R15 - ADCQ DX, R8 - MOVQ R13, R10 - MOVQ R14, R11 - MOVQ R15, R12 - ANDQ $0x03, R12 - MOVQ R15, R13 - ANDQ $-4, R13 - MOVQ R8, R14 - SHRQ $0x02, R8, R15 - SHRQ $0x02, R8 - ADDQ R13, R10 - ADCQ R14, R11 - ADCQ $0x00, R12 - ADDQ R15, R10 - ADCQ R8, R11 - ADCQ $0x00, R12 - VPERM2I128 $0x02, Y0, Y14, Y15 - VPERM2I128 $0x13, Y0, Y14, Y14 - VPERM2I128 $0x02, Y12, Y4, Y0 - VPERM2I128 $0x13, Y12, Y4, Y12 - VPXOR (SI), Y15, Y15 - VPXOR 32(SI), Y0, Y0 - VPXOR 64(SI), Y14, Y14 - VPXOR 96(SI), Y12, Y12 - VMOVDQU Y15, (DI) - VMOVDQU Y0, 32(DI) - VMOVDQU Y14, 64(DI) - VMOVDQU Y12, 96(DI) - VPERM2I128 $0x02, Y5, Y9, Y0 - VPERM2I128 $0x02, Y13, Y1, Y14 - VPERM2I128 $0x13, Y5, Y9, Y12 - VPERM2I128 $0x13, Y13, Y1, Y4 - VPXOR 128(SI), Y0, Y0 - VPXOR 160(SI), Y14, Y14 - VPXOR 192(SI), Y12, Y12 - VPXOR 224(SI), Y4, Y4 - VMOVDQU Y0, 128(DI) - VMOVDQU Y14, 160(DI) - VMOVDQU Y12, 192(DI) - VMOVDQU Y4, 224(DI) - - // and here - ADDQ 496(SI), R10 - ADCQ 504(SI), R11 - ADCQ $0x01, R12 - MOVQ (BP), DX - MOVQ DX, R15 - MULXQ R10, R13, R14 - IMULQ R12, R15 - MULXQ R11, AX, DX - ADDQ AX, R14 - ADCQ DX, R15 - MOVQ 8(BP), DX - MULXQ R10, R10, AX - ADDQ R10, R14 - MULXQ R11, R11, R8 - ADCQ R11, R15 - ADCQ $0x00, R8 - IMULQ R12, DX - ADDQ AX, R15 - ADCQ DX, R8 - MOVQ R13, R10 - MOVQ R14, R11 - MOVQ R15, R12 - ANDQ $0x03, R12 - MOVQ R15, R13 - ANDQ $-4, R13 - MOVQ R8, R14 - SHRQ $0x02, R8, R15 - SHRQ $0x02, R8 - ADDQ R13, R10 - ADCQ R14, R11 - ADCQ $0x00, R12 - ADDQ R15, R10 - ADCQ R8, R11 - ADCQ $0x00, R12 - VPERM2I128 $0x02, Y6, Y10, Y0 - VPERM2I128 $0x02, Y8, Y2, Y14 - VPERM2I128 $0x13, Y6, Y10, Y12 - VPERM2I128 $0x13, Y8, Y2, Y4 - VPXOR 256(SI), Y0, Y0 - VPXOR 288(SI), Y14, Y14 - VPXOR 320(SI), Y12, Y12 - VPXOR 352(SI), Y4, Y4 - VMOVDQU Y0, 256(DI) - VMOVDQU Y14, 288(DI) - VMOVDQU Y12, 320(DI) - VMOVDQU Y4, 352(DI) - VPERM2I128 $0x02, Y7, Y11, Y0 - VPERM2I128 $0x02, 224(BP), Y3, Y14 - VPERM2I128 $0x13, Y7, Y11, Y12 - VPERM2I128 $0x13, 224(BP), Y3, Y4 - VPXOR 384(SI), Y0, Y0 - VPXOR 416(SI), Y14, Y14 - VPXOR 448(SI), Y12, Y12 - VPXOR 480(SI), Y4, Y4 - VMOVDQU Y0, 384(DI) - VMOVDQU Y14, 416(DI) - VMOVDQU Y12, 448(DI) - VMOVDQU Y4, 480(DI) - LEAQ 512(SI), SI - LEAQ 512(DI), DI - SUBQ $0x00000200, BX - JMP openAVX2MainLoop - -openAVX2MainLoopDone: - // Handle the various tail sizes efficiently - TESTQ BX, BX - JE openSSEFinalize - CMPQ BX, $0x80 - JBE openAVX2Tail128 - CMPQ BX, $0x00000100 - JBE openAVX2Tail256 - CMPQ BX, $0x00000180 - JBE openAVX2Tail384 - JMP openAVX2Tail512 - -openAVX2192: - VMOVDQA Y0, Y5 - VMOVDQA Y14, Y9 - VMOVDQA Y12, Y13 - VPADDD ·avx2IncMask<>+0(SB), Y4, Y1 - VMOVDQA Y0, Y6 - VMOVDQA Y14, Y10 - VMOVDQA Y12, Y8 - VMOVDQA Y4, Y2 - VMOVDQA Y1, Y15 - MOVQ $0x0000000a, R9 - -openAVX2192InnerCipherLoop: - VPADDD Y14, Y0, Y0 - VPXOR Y0, Y4, Y4 - VPSHUFB ·rol16<>+0(SB), Y4, Y4 - VPADDD Y4, Y12, Y12 - VPXOR Y12, Y14, Y14 - VPSLLD $0x0c, Y14, Y3 - VPSRLD $0x14, Y14, Y14 - VPXOR Y3, Y14, Y14 - VPADDD Y14, Y0, Y0 - VPXOR Y0, Y4, Y4 - VPSHUFB ·rol8<>+0(SB), Y4, Y4 - VPADDD Y4, Y12, Y12 - VPXOR Y12, Y14, Y14 - VPSLLD $0x07, Y14, Y3 - VPSRLD $0x19, Y14, Y14 - VPXOR Y3, Y14, Y14 - VPADDD Y9, Y5, Y5 - VPXOR Y5, Y1, Y1 - VPSHUFB ·rol16<>+0(SB), Y1, Y1 - VPADDD Y1, Y13, Y13 - VPXOR Y13, Y9, Y9 - VPSLLD $0x0c, Y9, Y3 - VPSRLD $0x14, Y9, Y9 - VPXOR Y3, Y9, Y9 - VPADDD Y9, Y5, Y5 - VPXOR Y5, Y1, Y1 - VPSHUFB ·rol8<>+0(SB), Y1, Y1 - VPADDD Y1, Y13, Y13 - VPXOR Y13, Y9, Y9 - VPSLLD $0x07, Y9, Y3 - VPSRLD $0x19, Y9, Y9 - VPXOR Y3, Y9, Y9 - VPALIGNR $0x04, Y14, Y14, Y14 - VPALIGNR $0x04, Y9, Y9, Y9 - VPALIGNR $0x08, Y12, Y12, Y12 - VPALIGNR $0x08, Y13, Y13, Y13 - VPALIGNR $0x0c, Y4, Y4, Y4 - VPALIGNR $0x0c, Y1, Y1, Y1 - VPADDD Y14, Y0, Y0 - VPXOR Y0, Y4, Y4 - VPSHUFB ·rol16<>+0(SB), Y4, Y4 - VPADDD Y4, Y12, Y12 - VPXOR Y12, Y14, Y14 - VPSLLD $0x0c, Y14, Y3 - VPSRLD $0x14, Y14, Y14 - VPXOR Y3, Y14, Y14 - VPADDD Y14, Y0, Y0 - VPXOR Y0, Y4, Y4 - VPSHUFB ·rol8<>+0(SB), Y4, Y4 - VPADDD Y4, Y12, Y12 - VPXOR Y12, Y14, Y14 - VPSLLD $0x07, Y14, Y3 - VPSRLD $0x19, Y14, Y14 - VPXOR Y3, Y14, Y14 - VPADDD Y9, Y5, Y5 - VPXOR Y5, Y1, Y1 - VPSHUFB ·rol16<>+0(SB), Y1, Y1 - VPADDD Y1, Y13, Y13 - VPXOR Y13, Y9, Y9 - VPSLLD $0x0c, Y9, Y3 - VPSRLD $0x14, Y9, Y9 - VPXOR Y3, Y9, Y9 - VPADDD Y9, Y5, Y5 - VPXOR Y5, Y1, Y1 - VPSHUFB ·rol8<>+0(SB), Y1, Y1 - VPADDD Y1, Y13, Y13 - VPXOR Y13, Y9, Y9 - VPSLLD $0x07, Y9, Y3 - VPSRLD $0x19, Y9, Y9 - VPXOR Y3, Y9, Y9 - VPALIGNR $0x0c, Y14, Y14, Y14 - VPALIGNR $0x0c, Y9, Y9, Y9 - VPALIGNR $0x08, Y12, Y12, Y12 - VPALIGNR $0x08, Y13, Y13, Y13 - VPALIGNR $0x04, Y4, Y4, Y4 - VPALIGNR $0x04, Y1, Y1, Y1 - DECQ R9 - JNE openAVX2192InnerCipherLoop - VPADDD Y6, Y0, Y0 - VPADDD Y6, Y5, Y5 - VPADDD Y10, Y14, Y14 - VPADDD Y10, Y9, Y9 - VPADDD Y8, Y12, Y12 - VPADDD Y8, Y13, Y13 - VPADDD Y2, Y4, Y4 - VPADDD Y15, Y1, Y1 - VPERM2I128 $0x02, Y0, Y14, Y3 - - // Clamp and store poly key - VPAND ·polyClampMask<>+0(SB), Y3, Y3 - VMOVDQA Y3, (BP) - - // Stream for up to 192 bytes - VPERM2I128 $0x13, Y0, Y14, Y0 - VPERM2I128 $0x13, Y12, Y4, Y14 - VPERM2I128 $0x02, Y5, Y9, Y12 - VPERM2I128 $0x02, Y13, Y1, Y4 - VPERM2I128 $0x13, Y5, Y9, Y5 - VPERM2I128 $0x13, Y13, Y1, Y9 - -openAVX2ShortOpen: - // Hash - MOVQ ad_len+80(FP), R9 - CALL polyHashADInternal<>(SB) - -openAVX2ShortOpenLoop: - CMPQ BX, $0x20 - JB openAVX2ShortTail32 - SUBQ $0x20, BX - - // Load for hashing - ADDQ (SI), R10 - ADCQ 8(SI), R11 - ADCQ $0x01, R12 - MOVQ (BP), DX - MOVQ DX, R15 - MULXQ R10, R13, R14 - IMULQ R12, R15 - MULXQ R11, AX, DX - ADDQ AX, R14 - ADCQ DX, R15 - MOVQ 8(BP), DX - MULXQ R10, R10, AX - ADDQ R10, R14 - MULXQ R11, R11, R8 - ADCQ R11, R15 - ADCQ $0x00, R8 - IMULQ R12, DX - ADDQ AX, R15 - ADCQ DX, R8 - MOVQ R13, R10 - MOVQ R14, R11 - MOVQ R15, R12 - ANDQ $0x03, R12 - MOVQ R15, R13 - ANDQ $-4, R13 - MOVQ R8, R14 - SHRQ $0x02, R8, R15 - SHRQ $0x02, R8 - ADDQ R13, R10 - ADCQ R14, R11 - ADCQ $0x00, R12 - ADDQ R15, R10 - ADCQ R8, R11 - ADCQ $0x00, R12 - ADDQ 16(SI), R10 - ADCQ 24(SI), R11 - ADCQ $0x01, R12 - MOVQ (BP), DX - MOVQ DX, R15 - MULXQ R10, R13, R14 - IMULQ R12, R15 - MULXQ R11, AX, DX - ADDQ AX, R14 - ADCQ DX, R15 - MOVQ 8(BP), DX - MULXQ R10, R10, AX - ADDQ R10, R14 - MULXQ R11, R11, R8 - ADCQ R11, R15 - ADCQ $0x00, R8 - IMULQ R12, DX - ADDQ AX, R15 - ADCQ DX, R8 - MOVQ R13, R10 - MOVQ R14, R11 - MOVQ R15, R12 - ANDQ $0x03, R12 - MOVQ R15, R13 - ANDQ $-4, R13 - MOVQ R8, R14 - SHRQ $0x02, R8, R15 - SHRQ $0x02, R8 - ADDQ R13, R10 - ADCQ R14, R11 - ADCQ $0x00, R12 - ADDQ R15, R10 - ADCQ R8, R11 - ADCQ $0x00, R12 - - // Load for decryption - VPXOR (SI), Y0, Y0 - VMOVDQU Y0, (DI) - LEAQ 32(SI), SI - LEAQ 32(DI), DI - - // Shift stream left - VMOVDQA Y14, Y0 - VMOVDQA Y12, Y14 - VMOVDQA Y4, Y12 - VMOVDQA Y5, Y4 - VMOVDQA Y9, Y5 - VMOVDQA Y13, Y9 - VMOVDQA Y1, Y13 - VMOVDQA Y6, Y1 - VMOVDQA Y10, Y6 - JMP openAVX2ShortOpenLoop - -openAVX2ShortTail32: - CMPQ BX, $0x10 - VMOVDQA X0, X1 - JB openAVX2ShortDone - SUBQ $0x10, BX - - // Load for hashing - ADDQ (SI), R10 - ADCQ 8(SI), R11 - ADCQ $0x01, R12 - MOVQ (BP), DX - MOVQ DX, R15 - MULXQ R10, R13, R14 - IMULQ R12, R15 - MULXQ R11, AX, DX - ADDQ AX, R14 - ADCQ DX, R15 - MOVQ 8(BP), DX - MULXQ R10, R10, AX - ADDQ R10, R14 - MULXQ R11, R11, R8 - ADCQ R11, R15 - ADCQ $0x00, R8 - IMULQ R12, DX - ADDQ AX, R15 - ADCQ DX, R8 - MOVQ R13, R10 - MOVQ R14, R11 - MOVQ R15, R12 - ANDQ $0x03, R12 - MOVQ R15, R13 - ANDQ $-4, R13 - MOVQ R8, R14 - SHRQ $0x02, R8, R15 - SHRQ $0x02, R8 - ADDQ R13, R10 - ADCQ R14, R11 - ADCQ $0x00, R12 - ADDQ R15, R10 - ADCQ R8, R11 - ADCQ $0x00, R12 - - // Load for decryption - VPXOR (SI), X0, X12 - VMOVDQU X12, (DI) - LEAQ 16(SI), SI - LEAQ 16(DI), DI - VPERM2I128 $0x11, Y0, Y0, Y0 - VMOVDQA X0, X1 - -openAVX2ShortDone: - VZEROUPPER - JMP openSSETail16 - -openAVX2320: - VMOVDQA Y0, Y5 - VMOVDQA Y14, Y9 - VMOVDQA Y12, Y13 - VPADDD ·avx2IncMask<>+0(SB), Y4, Y1 - VMOVDQA Y0, Y6 - VMOVDQA Y14, Y10 - VMOVDQA Y12, Y8 - VPADDD ·avx2IncMask<>+0(SB), Y1, Y2 - VMOVDQA Y14, Y7 - VMOVDQA Y12, Y11 - VMOVDQA Y4, Y15 - MOVQ $0x0000000a, R9 - -openAVX2320InnerCipherLoop: - VPADDD Y14, Y0, Y0 - VPXOR Y0, Y4, Y4 - VPSHUFB ·rol16<>+0(SB), Y4, Y4 - VPADDD Y4, Y12, Y12 - VPXOR Y12, Y14, Y14 - VPSLLD $0x0c, Y14, Y3 - VPSRLD $0x14, Y14, Y14 - VPXOR Y3, Y14, Y14 - VPADDD Y14, Y0, Y0 - VPXOR Y0, Y4, Y4 - VPSHUFB ·rol8<>+0(SB), Y4, Y4 - VPADDD Y4, Y12, Y12 - VPXOR Y12, Y14, Y14 - VPSLLD $0x07, Y14, Y3 - VPSRLD $0x19, Y14, Y14 - VPXOR Y3, Y14, Y14 - VPADDD Y9, Y5, Y5 - VPXOR Y5, Y1, Y1 - VPSHUFB ·rol16<>+0(SB), Y1, Y1 - VPADDD Y1, Y13, Y13 - VPXOR Y13, Y9, Y9 - VPSLLD $0x0c, Y9, Y3 - VPSRLD $0x14, Y9, Y9 - VPXOR Y3, Y9, Y9 - VPADDD Y9, Y5, Y5 - VPXOR Y5, Y1, Y1 - VPSHUFB ·rol8<>+0(SB), Y1, Y1 - VPADDD Y1, Y13, Y13 - VPXOR Y13, Y9, Y9 - VPSLLD $0x07, Y9, Y3 - VPSRLD $0x19, Y9, Y9 - VPXOR Y3, Y9, Y9 - VPADDD Y10, Y6, Y6 - VPXOR Y6, Y2, Y2 - VPSHUFB ·rol16<>+0(SB), Y2, Y2 - VPADDD Y2, Y8, Y8 - VPXOR Y8, Y10, Y10 - VPSLLD $0x0c, Y10, Y3 - VPSRLD $0x14, Y10, Y10 - VPXOR Y3, Y10, Y10 - VPADDD Y10, Y6, Y6 - VPXOR Y6, Y2, Y2 - VPSHUFB ·rol8<>+0(SB), Y2, Y2 - VPADDD Y2, Y8, Y8 - VPXOR Y8, Y10, Y10 - VPSLLD $0x07, Y10, Y3 - VPSRLD $0x19, Y10, Y10 - VPXOR Y3, Y10, Y10 - VPALIGNR $0x04, Y14, Y14, Y14 - VPALIGNR $0x04, Y9, Y9, Y9 - VPALIGNR $0x04, Y10, Y10, Y10 - VPALIGNR $0x08, Y12, Y12, Y12 - VPALIGNR $0x08, Y13, Y13, Y13 - VPALIGNR $0x08, Y8, Y8, Y8 - VPALIGNR $0x0c, Y4, Y4, Y4 - VPALIGNR $0x0c, Y1, Y1, Y1 - VPALIGNR $0x0c, Y2, Y2, Y2 - VPADDD Y14, Y0, Y0 - VPXOR Y0, Y4, Y4 - VPSHUFB ·rol16<>+0(SB), Y4, Y4 - VPADDD Y4, Y12, Y12 - VPXOR Y12, Y14, Y14 - VPSLLD $0x0c, Y14, Y3 - VPSRLD $0x14, Y14, Y14 - VPXOR Y3, Y14, Y14 - VPADDD Y14, Y0, Y0 - VPXOR Y0, Y4, Y4 - VPSHUFB ·rol8<>+0(SB), Y4, Y4 - VPADDD Y4, Y12, Y12 - VPXOR Y12, Y14, Y14 - VPSLLD $0x07, Y14, Y3 - VPSRLD $0x19, Y14, Y14 - VPXOR Y3, Y14, Y14 - VPADDD Y9, Y5, Y5 - VPXOR Y5, Y1, Y1 - VPSHUFB ·rol16<>+0(SB), Y1, Y1 - VPADDD Y1, Y13, Y13 - VPXOR Y13, Y9, Y9 - VPSLLD $0x0c, Y9, Y3 - VPSRLD $0x14, Y9, Y9 - VPXOR Y3, Y9, Y9 - VPADDD Y9, Y5, Y5 - VPXOR Y5, Y1, Y1 - VPSHUFB ·rol8<>+0(SB), Y1, Y1 - VPADDD Y1, Y13, Y13 - VPXOR Y13, Y9, Y9 - VPSLLD $0x07, Y9, Y3 - VPSRLD $0x19, Y9, Y9 - VPXOR Y3, Y9, Y9 - VPADDD Y10, Y6, Y6 - VPXOR Y6, Y2, Y2 - VPSHUFB ·rol16<>+0(SB), Y2, Y2 - VPADDD Y2, Y8, Y8 - VPXOR Y8, Y10, Y10 - VPSLLD $0x0c, Y10, Y3 - VPSRLD $0x14, Y10, Y10 - VPXOR Y3, Y10, Y10 - VPADDD Y10, Y6, Y6 - VPXOR Y6, Y2, Y2 - VPSHUFB ·rol8<>+0(SB), Y2, Y2 - VPADDD Y2, Y8, Y8 - VPXOR Y8, Y10, Y10 - VPSLLD $0x07, Y10, Y3 - VPSRLD $0x19, Y10, Y10 - VPXOR Y3, Y10, Y10 - VPALIGNR $0x0c, Y14, Y14, Y14 - VPALIGNR $0x0c, Y9, Y9, Y9 - VPALIGNR $0x0c, Y10, Y10, Y10 - VPALIGNR $0x08, Y12, Y12, Y12 - VPALIGNR $0x08, Y13, Y13, Y13 - VPALIGNR $0x08, Y8, Y8, Y8 - VPALIGNR $0x04, Y4, Y4, Y4 - VPALIGNR $0x04, Y1, Y1, Y1 - VPALIGNR $0x04, Y2, Y2, Y2 - DECQ R9 - JNE openAVX2320InnerCipherLoop - VMOVDQA ·chacha20Constants<>+0(SB), Y3 - VPADDD Y3, Y0, Y0 - VPADDD Y3, Y5, Y5 - VPADDD Y3, Y6, Y6 - VPADDD Y7, Y14, Y14 - VPADDD Y7, Y9, Y9 - VPADDD Y7, Y10, Y10 - VPADDD Y11, Y12, Y12 - VPADDD Y11, Y13, Y13 - VPADDD Y11, Y8, Y8 - VMOVDQA ·avx2IncMask<>+0(SB), Y3 - VPADDD Y15, Y4, Y4 - VPADDD Y3, Y15, Y15 - VPADDD Y15, Y1, Y1 - VPADDD Y3, Y15, Y15 - VPADDD Y15, Y2, Y2 - - // Clamp and store poly key - VPERM2I128 $0x02, Y0, Y14, Y3 - VPAND ·polyClampMask<>+0(SB), Y3, Y3 - VMOVDQA Y3, (BP) - - // Stream for up to 320 bytes - VPERM2I128 $0x13, Y0, Y14, Y0 - VPERM2I128 $0x13, Y12, Y4, Y14 - VPERM2I128 $0x02, Y5, Y9, Y12 - VPERM2I128 $0x02, Y13, Y1, Y4 - VPERM2I128 $0x13, Y5, Y9, Y5 - VPERM2I128 $0x13, Y13, Y1, Y9 - VPERM2I128 $0x02, Y6, Y10, Y13 - VPERM2I128 $0x02, Y8, Y2, Y1 - VPERM2I128 $0x13, Y6, Y10, Y6 - VPERM2I128 $0x13, Y8, Y2, Y10 - JMP openAVX2ShortOpen - -openAVX2Tail128: - // Need to decrypt up to 128 bytes - prepare two blocks - VMOVDQA ·chacha20Constants<>+0(SB), Y5 - VMOVDQA 32(BP), Y9 - VMOVDQA 64(BP), Y13 - VMOVDQA 192(BP), Y1 - VPADDD ·avx2IncMask<>+0(SB), Y1, Y1 - VMOVDQA Y1, Y4 - XORQ R9, R9 - MOVQ BX, CX - ANDQ $-16, CX - TESTQ CX, CX - JE openAVX2Tail128LoopB - -openAVX2Tail128LoopA: - ADDQ (SI)(R9*1), R10 - ADCQ 8(SI)(R9*1), R11 - ADCQ $0x01, R12 - MOVQ (BP), DX - MOVQ DX, R15 - MULXQ R10, R13, R14 - IMULQ R12, R15 - MULXQ R11, AX, DX - ADDQ AX, R14 - ADCQ DX, R15 - MOVQ 8(BP), DX - MULXQ R10, R10, AX - ADDQ R10, R14 - MULXQ R11, R11, R8 - ADCQ R11, R15 - ADCQ $0x00, R8 - IMULQ R12, DX - ADDQ AX, R15 - ADCQ DX, R8 - MOVQ R13, R10 - MOVQ R14, R11 - MOVQ R15, R12 - ANDQ $0x03, R12 - MOVQ R15, R13 - ANDQ $-4, R13 - MOVQ R8, R14 - SHRQ $0x02, R8, R15 - SHRQ $0x02, R8 - ADDQ R13, R10 - ADCQ R14, R11 - ADCQ $0x00, R12 - ADDQ R15, R10 - ADCQ R8, R11 - ADCQ $0x00, R12 - -openAVX2Tail128LoopB: - ADDQ $0x10, R9 - VPADDD Y9, Y5, Y5 - VPXOR Y5, Y1, Y1 - VPSHUFB ·rol16<>+0(SB), Y1, Y1 - VPADDD Y1, Y13, Y13 - VPXOR Y13, Y9, Y9 - VPSLLD $0x0c, Y9, Y3 - VPSRLD $0x14, Y9, Y9 - VPXOR Y3, Y9, Y9 - VPADDD Y9, Y5, Y5 - VPXOR Y5, Y1, Y1 - VPSHUFB ·rol8<>+0(SB), Y1, Y1 - VPADDD Y1, Y13, Y13 - VPXOR Y13, Y9, Y9 - VPSLLD $0x07, Y9, Y3 - VPSRLD $0x19, Y9, Y9 - VPXOR Y3, Y9, Y9 - VPALIGNR $0x04, Y9, Y9, Y9 - VPALIGNR $0x08, Y13, Y13, Y13 - VPALIGNR $0x0c, Y1, Y1, Y1 - VPADDD Y9, Y5, Y5 - VPXOR Y5, Y1, Y1 - VPSHUFB ·rol16<>+0(SB), Y1, Y1 - VPADDD Y1, Y13, Y13 - VPXOR Y13, Y9, Y9 - VPSLLD $0x0c, Y9, Y3 - VPSRLD $0x14, Y9, Y9 - VPXOR Y3, Y9, Y9 - VPADDD Y9, Y5, Y5 - VPXOR Y5, Y1, Y1 - VPSHUFB ·rol8<>+0(SB), Y1, Y1 - VPADDD Y1, Y13, Y13 - VPXOR Y13, Y9, Y9 - VPSLLD $0x07, Y9, Y3 - VPSRLD $0x19, Y9, Y9 - VPXOR Y3, Y9, Y9 - VPALIGNR $0x0c, Y9, Y9, Y9 - VPALIGNR $0x08, Y13, Y13, Y13 - VPALIGNR $0x04, Y1, Y1, Y1 - CMPQ R9, CX - JB openAVX2Tail128LoopA - CMPQ R9, $0xa0 - JNE openAVX2Tail128LoopB - VPADDD ·chacha20Constants<>+0(SB), Y5, Y5 - VPADDD 32(BP), Y9, Y9 - VPADDD 64(BP), Y13, Y13 - VPADDD Y4, Y1, Y1 - VPERM2I128 $0x02, Y5, Y9, Y0 - VPERM2I128 $0x02, Y13, Y1, Y14 - VPERM2I128 $0x13, Y5, Y9, Y12 - VPERM2I128 $0x13, Y13, Y1, Y4 - -openAVX2TailLoop: - CMPQ BX, $0x20 - JB openAVX2Tail - SUBQ $0x20, BX - - // Load for decryption - VPXOR (SI), Y0, Y0 - VMOVDQU Y0, (DI) - LEAQ 32(SI), SI - LEAQ 32(DI), DI - VMOVDQA Y14, Y0 - VMOVDQA Y12, Y14 - VMOVDQA Y4, Y12 - JMP openAVX2TailLoop - -openAVX2Tail: - CMPQ BX, $0x10 - VMOVDQA X0, X1 - JB openAVX2TailDone - SUBQ $0x10, BX - - // Load for decryption - VPXOR (SI), X0, X12 - VMOVDQU X12, (DI) - LEAQ 16(SI), SI - LEAQ 16(DI), DI - VPERM2I128 $0x11, Y0, Y0, Y0 - VMOVDQA X0, X1 - -openAVX2TailDone: - VZEROUPPER - JMP openSSETail16 - -openAVX2Tail256: - VMOVDQA ·chacha20Constants<>+0(SB), Y0 - VMOVDQA Y0, Y5 - VMOVDQA 32(BP), Y14 - VMOVDQA Y14, Y9 - VMOVDQA 64(BP), Y12 - VMOVDQA Y12, Y13 - VMOVDQA 192(BP), Y4 - VPADDD ·avx2IncMask<>+0(SB), Y4, Y4 - VPADDD ·avx2IncMask<>+0(SB), Y4, Y1 - VMOVDQA Y4, Y7 - VMOVDQA Y1, Y11 - - // Compute the number of iterations that will hash data - MOVQ BX, 224(BP) - MOVQ BX, CX - SUBQ $0x80, CX - SHRQ $0x04, CX - MOVQ $0x0000000a, R9 - CMPQ CX, $0x0a - CMOVQGT R9, CX - MOVQ SI, BX - XORQ R9, R9 - -openAVX2Tail256LoopA: - ADDQ (BX), R10 - ADCQ 8(BX), R11 - ADCQ $0x01, R12 - MOVQ (BP), DX - MOVQ DX, R15 - MULXQ R10, R13, R14 - IMULQ R12, R15 - MULXQ R11, AX, DX - ADDQ AX, R14 - ADCQ DX, R15 - MOVQ 8(BP), DX - MULXQ R10, R10, AX - ADDQ R10, R14 - MULXQ R11, R11, R8 - ADCQ R11, R15 - ADCQ $0x00, R8 - IMULQ R12, DX - ADDQ AX, R15 - ADCQ DX, R8 - MOVQ R13, R10 - MOVQ R14, R11 - MOVQ R15, R12 - ANDQ $0x03, R12 - MOVQ R15, R13 - ANDQ $-4, R13 - MOVQ R8, R14 - SHRQ $0x02, R8, R15 - SHRQ $0x02, R8 - ADDQ R13, R10 - ADCQ R14, R11 - ADCQ $0x00, R12 - ADDQ R15, R10 - ADCQ R8, R11 - ADCQ $0x00, R12 - LEAQ 16(BX), BX - -openAVX2Tail256LoopB: - VPADDD Y14, Y0, Y0 - VPXOR Y0, Y4, Y4 - VPSHUFB ·rol16<>+0(SB), Y4, Y4 - VPADDD Y4, Y12, Y12 - VPXOR Y12, Y14, Y14 - VPSLLD $0x0c, Y14, Y3 - VPSRLD $0x14, Y14, Y14 - VPXOR Y3, Y14, Y14 +openAVX2InternalLoop: + ADDQ (SI)(CX*1), R10 + ADCQ 8(SI)(CX*1), R11 + ADCQ $0x01, R12 VPADDD Y14, Y0, Y0 - VPXOR Y0, Y4, Y4 - VPSHUFB ·rol8<>+0(SB), Y4, Y4 - VPADDD Y4, Y12, Y12 - VPXOR Y12, Y14, Y14 - VPSLLD $0x07, Y14, Y3 - VPSRLD $0x19, Y14, Y14 - VPXOR Y3, Y14, Y14 VPADDD Y9, Y5, Y5 + VPADDD Y10, Y6, Y6 + VPADDD Y11, Y7, Y7 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + VPXOR Y0, Y4, Y4 VPXOR Y5, Y1, Y1 + VPXOR Y6, Y2, Y2 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y3, Y3 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + VPADDD Y4, Y12, Y12 VPADDD Y1, Y13, Y13 + VPADDD Y2, Y8, Y8 + VPADDD Y3, Y15, Y15 + VPXOR Y12, Y14, Y14 VPXOR Y13, Y9, Y9 - VPSLLD $0x0c, Y9, Y3 + VPXOR Y8, Y10, Y10 + VPXOR Y15, Y11, Y11 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + VMOVDQA Y15, 224(BP) + VPSLLD $0x0c, Y14, Y15 + VPSRLD $0x14, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPSLLD $0x0c, Y9, Y15 VPSRLD $0x14, Y9, Y9 - VPXOR Y3, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPSLLD $0x0c, Y10, Y15 + VPSRLD $0x14, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPSLLD $0x0c, Y11, Y15 + VPSRLD $0x14, Y11, Y11 + VPXOR Y15, Y11, Y11 + VMOVDQA 224(BP), Y15 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + VPADDD Y14, Y0, Y0 VPADDD Y9, Y5, Y5 + VPADDD Y10, Y6, Y6 + VPADDD Y11, Y7, Y7 + VPXOR Y0, Y4, Y4 VPXOR Y5, Y1, Y1 + VPXOR Y6, Y2, Y2 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y3, Y3 + ADDQ 16(SI)(CX*1), R10 + ADCQ 24(SI)(CX*1), R11 + ADCQ $0x01, R12 + VPADDD Y4, Y12, Y12 VPADDD Y1, Y13, Y13 + VPADDD Y2, Y8, Y8 + VPADDD Y3, Y15, Y15 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + VPXOR Y12, Y14, Y14 VPXOR Y13, Y9, Y9 - VPSLLD $0x07, Y9, Y3 + VPXOR Y8, Y10, Y10 + VPXOR Y15, Y11, Y11 + VMOVDQA Y15, 224(BP) + VPSLLD $0x07, Y14, Y15 + VPSRLD $0x19, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPSLLD $0x07, Y9, Y15 VPSRLD $0x19, Y9, Y9 - VPXOR Y3, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPSLLD $0x07, Y10, Y15 + VPSRLD $0x19, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPSLLD $0x07, Y11, Y15 + VPSRLD $0x19, Y11, Y11 + VPXOR Y15, Y11, Y11 + VMOVDQA 224(BP), Y15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 VPALIGNR $0x04, Y14, Y14, Y14 VPALIGNR $0x04, Y9, Y9, Y9 + VPALIGNR $0x04, Y10, Y10, Y10 + VPALIGNR $0x04, Y11, Y11, Y11 VPALIGNR $0x08, Y12, Y12, Y12 VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x08, Y8, Y8, Y8 + VPALIGNR $0x08, Y15, Y15, Y15 VPALIGNR $0x0c, Y4, Y4, Y4 VPALIGNR $0x0c, Y1, Y1, Y1 - INCQ R9 - VPADDD Y14, Y0, Y0 - VPXOR Y0, Y4, Y4 - VPSHUFB ·rol16<>+0(SB), Y4, Y4 - VPADDD Y4, Y12, Y12 - VPXOR Y12, Y14, Y14 - VPSLLD $0x0c, Y14, Y3 - VPSRLD $0x14, Y14, Y14 - VPXOR Y3, Y14, Y14 + VPALIGNR $0x0c, Y2, Y2, Y2 + VPALIGNR $0x0c, Y3, Y3, Y3 VPADDD Y14, Y0, Y0 - VPXOR Y0, Y4, Y4 - VPSHUFB ·rol8<>+0(SB), Y4, Y4 - VPADDD Y4, Y12, Y12 - VPXOR Y12, Y14, Y14 - VPSLLD $0x07, Y14, Y3 - VPSRLD $0x19, Y14, Y14 - VPXOR Y3, Y14, Y14 VPADDD Y9, Y5, Y5 + VPADDD Y10, Y6, Y6 + VPADDD Y11, Y7, Y7 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + VPXOR Y0, Y4, Y4 VPXOR Y5, Y1, Y1 + VPXOR Y6, Y2, Y2 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y3, Y3 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + VPADDD Y4, Y12, Y12 VPADDD Y1, Y13, Y13 + VPADDD Y2, Y8, Y8 + VPADDD Y3, Y15, Y15 + VPXOR Y12, Y14, Y14 VPXOR Y13, Y9, Y9 - VPSLLD $0x0c, Y9, Y3 + VPXOR Y8, Y10, Y10 + VPXOR Y15, Y11, Y11 + ADDQ 32(SI)(CX*1), R10 + ADCQ 40(SI)(CX*1), R11 + ADCQ $0x01, R12 + LEAQ 48(CX), CX + VMOVDQA Y15, 224(BP) + VPSLLD $0x0c, Y14, Y15 + VPSRLD $0x14, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPSLLD $0x0c, Y9, Y15 VPSRLD $0x14, Y9, Y9 - VPXOR Y3, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPSLLD $0x0c, Y10, Y15 + VPSRLD $0x14, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPSLLD $0x0c, Y11, Y15 + VPSRLD $0x14, Y11, Y11 + VPXOR Y15, Y11, Y11 + VMOVDQA 224(BP), Y15 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + VPADDD Y14, Y0, Y0 VPADDD Y9, Y5, Y5 + VPADDD Y10, Y6, Y6 + VPADDD Y11, Y7, Y7 + VPXOR Y0, Y4, Y4 VPXOR Y5, Y1, Y1 + VPXOR Y6, Y2, Y2 + VPXOR Y7, Y3, Y3 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y3, Y3 + VPADDD Y4, Y12, Y12 VPADDD Y1, Y13, Y13 + VPADDD Y2, Y8, Y8 + VPADDD Y3, Y15, Y15 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + VPXOR Y12, Y14, Y14 VPXOR Y13, Y9, Y9 - VPSLLD $0x07, Y9, Y3 + VPXOR Y8, Y10, Y10 + VPXOR Y15, Y11, Y11 + VMOVDQA Y15, 224(BP) + VPSLLD $0x07, Y14, Y15 + VPSRLD $0x19, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPSLLD $0x07, Y9, Y15 VPSRLD $0x19, Y9, Y9 - VPXOR Y3, Y9, Y9 - VPALIGNR $0x0c, Y14, Y14, Y14 - VPALIGNR $0x0c, Y9, Y9, Y9 - VPALIGNR $0x08, Y12, Y12, Y12 - VPALIGNR $0x08, Y13, Y13, Y13 - VPALIGNR $0x04, Y4, Y4, Y4 - VPALIGNR $0x04, Y1, Y1, Y1 - CMPQ R9, CX - JB openAVX2Tail256LoopA - CMPQ R9, $0x0a - JNE openAVX2Tail256LoopB - MOVQ BX, R9 - SUBQ SI, BX - MOVQ BX, CX - MOVQ 224(BP), BX - -openAVX2Tail256Hash: - ADDQ $0x10, CX - CMPQ CX, BX - JGT openAVX2Tail256HashEnd - ADDQ (R9), R10 - ADCQ 8(R9), R11 - ADCQ $0x01, R12 - MOVQ (BP), DX - MOVQ DX, R15 - MULXQ R10, R13, R14 - IMULQ R12, R15 - MULXQ R11, AX, DX - ADDQ AX, R14 - ADCQ DX, R15 - MOVQ 8(BP), DX - MULXQ R10, R10, AX - ADDQ R10, R14 - MULXQ R11, R11, R8 - ADCQ R11, R15 - ADCQ $0x00, R8 - IMULQ R12, DX - ADDQ AX, R15 - ADCQ DX, R8 - MOVQ R13, R10 - MOVQ R14, R11 - MOVQ R15, R12 - ANDQ $0x03, R12 - MOVQ R15, R13 - ANDQ $-4, R13 - MOVQ R8, R14 - SHRQ $0x02, R8, R15 - SHRQ $0x02, R8 - ADDQ R13, R10 - ADCQ R14, R11 - ADCQ $0x00, R12 - ADDQ R15, R10 - ADCQ R8, R11 - ADCQ $0x00, R12 - LEAQ 16(R9), R9 - JMP openAVX2Tail256Hash + VPXOR Y15, Y9, Y9 + VPSLLD $0x07, Y10, Y15 + VPSRLD $0x19, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPSLLD $0x07, Y11, Y15 + VPSRLD $0x19, Y11, Y11 + VPXOR Y15, Y11, Y11 + VMOVDQA 224(BP), Y15 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + VPALIGNR $0x0c, Y14, Y14, Y14 + VPALIGNR $0x0c, Y9, Y9, Y9 + VPALIGNR $0x0c, Y10, Y10, Y10 + VPALIGNR $0x0c, Y11, Y11, Y11 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x08, Y8, Y8, Y8 + VPALIGNR $0x08, Y15, Y15, Y15 + VPALIGNR $0x04, Y4, Y4, Y4 + VPALIGNR $0x04, Y1, Y1, Y1 + VPALIGNR $0x04, Y2, Y2, Y2 + VPALIGNR $0x04, Y3, Y3, Y3 + CMPQ CX, $0x000001e0 + JNE openAVX2InternalLoop + VPADDD ·chacha20Constants<>+0(SB), Y0, Y0 + VPADDD ·chacha20Constants<>+0(SB), Y5, Y5 + VPADDD ·chacha20Constants<>+0(SB), Y6, Y6 + VPADDD ·chacha20Constants<>+0(SB), Y7, Y7 + VPADDD 32(BP), Y14, Y14 + VPADDD 32(BP), Y9, Y9 + VPADDD 32(BP), Y10, Y10 + VPADDD 32(BP), Y11, Y11 + VPADDD 64(BP), Y12, Y12 + VPADDD 64(BP), Y13, Y13 + VPADDD 64(BP), Y8, Y8 + VPADDD 64(BP), Y15, Y15 + VPADDD 96(BP), Y4, Y4 + VPADDD 128(BP), Y1, Y1 + VPADDD 160(BP), Y2, Y2 + VPADDD 192(BP), Y3, Y3 + VMOVDQA Y15, 224(BP) -openAVX2Tail256HashEnd: - VPADDD ·chacha20Constants<>+0(SB), Y0, Y0 - VPADDD ·chacha20Constants<>+0(SB), Y5, Y5 - VPADDD 32(BP), Y14, Y14 - VPADDD 32(BP), Y9, Y9 - VPADDD 64(BP), Y12, Y12 - VPADDD 64(BP), Y13, Y13 - VPADDD Y7, Y4, Y4 - VPADDD Y11, Y1, Y1 - VPERM2I128 $0x02, Y0, Y14, Y6 - VPERM2I128 $0x02, Y12, Y4, Y10 - VPERM2I128 $0x13, Y0, Y14, Y8 - VPERM2I128 $0x13, Y12, Y4, Y2 + // We only hashed 480 of the 512 bytes available - hash the remaining 32 here + ADDQ 480(SI), R10 + ADCQ 488(SI), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + VPERM2I128 $0x02, Y0, Y14, Y15 + VPERM2I128 $0x13, Y0, Y14, Y14 + VPERM2I128 $0x02, Y12, Y4, Y0 + VPERM2I128 $0x13, Y12, Y4, Y12 + VPXOR (SI), Y15, Y15 + VPXOR 32(SI), Y0, Y0 + VPXOR 64(SI), Y14, Y14 + VPXOR 96(SI), Y12, Y12 + VMOVDQU Y15, (DI) + VMOVDQU Y0, 32(DI) + VMOVDQU Y14, 64(DI) + VMOVDQU Y12, 96(DI) VPERM2I128 $0x02, Y5, Y9, Y0 VPERM2I128 $0x02, Y13, Y1, Y14 VPERM2I128 $0x13, Y5, Y9, Y12 VPERM2I128 $0x13, Y13, Y1, Y4 - VPXOR (SI), Y6, Y6 - VPXOR 32(SI), Y10, Y10 - VPXOR 64(SI), Y8, Y8 - VPXOR 96(SI), Y2, Y2 - VMOVDQU Y6, (DI) - VMOVDQU Y10, 32(DI) - VMOVDQU Y8, 64(DI) - VMOVDQU Y2, 96(DI) - LEAQ 128(SI), SI - LEAQ 128(DI), DI - SUBQ $0x80, BX - JMP openAVX2TailLoop + VPXOR 128(SI), Y0, Y0 + VPXOR 160(SI), Y14, Y14 + VPXOR 192(SI), Y12, Y12 + VPXOR 224(SI), Y4, Y4 + VMOVDQU Y0, 128(DI) + VMOVDQU Y14, 160(DI) + VMOVDQU Y12, 192(DI) + VMOVDQU Y4, 224(DI) -openAVX2Tail384: - // Need to decrypt up to 384 bytes - prepare six blocks - VMOVDQA ·chacha20Constants<>+0(SB), Y0 - VMOVDQA Y0, Y5 - VMOVDQA Y0, Y6 - VMOVDQA 32(BP), Y14 - VMOVDQA Y14, Y9 - VMOVDQA Y14, Y10 - VMOVDQA 64(BP), Y12 - VMOVDQA Y12, Y13 - VMOVDQA Y12, Y8 - VMOVDQA 192(BP), Y4 - VPADDD ·avx2IncMask<>+0(SB), Y4, Y4 - VPADDD ·avx2IncMask<>+0(SB), Y4, Y1 - VPADDD ·avx2IncMask<>+0(SB), Y1, Y2 - VMOVDQA Y4, 96(BP) - VMOVDQA Y1, 128(BP) - VMOVDQA Y2, 160(BP) + // and here + ADDQ 496(SI), R10 + ADCQ 504(SI), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + VPERM2I128 $0x02, Y6, Y10, Y0 + VPERM2I128 $0x02, Y8, Y2, Y14 + VPERM2I128 $0x13, Y6, Y10, Y12 + VPERM2I128 $0x13, Y8, Y2, Y4 + VPXOR 256(SI), Y0, Y0 + VPXOR 288(SI), Y14, Y14 + VPXOR 320(SI), Y12, Y12 + VPXOR 352(SI), Y4, Y4 + VMOVDQU Y0, 256(DI) + VMOVDQU Y14, 288(DI) + VMOVDQU Y12, 320(DI) + VMOVDQU Y4, 352(DI) + VPERM2I128 $0x02, Y7, Y11, Y0 + VPERM2I128 $0x02, 224(BP), Y3, Y14 + VPERM2I128 $0x13, Y7, Y11, Y12 + VPERM2I128 $0x13, 224(BP), Y3, Y4 + VPXOR 384(SI), Y0, Y0 + VPXOR 416(SI), Y14, Y14 + VPXOR 448(SI), Y12, Y12 + VPXOR 480(SI), Y4, Y4 + VMOVDQU Y0, 384(DI) + VMOVDQU Y14, 416(DI) + VMOVDQU Y12, 448(DI) + VMOVDQU Y4, 480(DI) + LEAQ 512(SI), SI + LEAQ 512(DI), DI + SUBQ $0x00000200, BX + JMP openAVX2MainLoop - // Compute the number of iterations that will hash two blocks of data - MOVQ BX, 224(BP) - MOVQ BX, CX - SUBQ $0x00000100, CX - SHRQ $0x04, CX - ADDQ $0x06, CX - MOVQ $0x0000000a, R9 - CMPQ CX, $0x0a - CMOVQGT R9, CX - MOVQ SI, BX - XORQ R9, R9 +openAVX2MainLoopDone: + // Handle the various tail sizes efficiently + TESTQ BX, BX + JE openSSEFinalize + CMPQ BX, $0x80 + JBE openAVX2Tail128 + CMPQ BX, $0x00000100 + JBE openAVX2Tail256 + CMPQ BX, $0x00000180 + JBE openAVX2Tail384 + JMP openAVX2Tail512 -openAVX2Tail384LoopB: - ADDQ (BX), R10 - ADCQ 8(BX), R11 +openSSEFinalize: + // Hash in the PT, AAD lengths + ADDQ ad_len+80(FP), R10 + ADCQ src_len+56(FP), R11 ADCQ $0x01, R12 - MOVQ (BP), DX - MOVQ DX, R15 - MULXQ R10, R13, R14 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 IMULQ R12, R15 - MULXQ R11, AX, DX ADDQ AX, R14 ADCQ DX, R15 - MOVQ 8(BP), DX - MULXQ R10, R10, AX - ADDQ R10, R14 - MULXQ R11, R11, R8 - ADCQ R11, R15 - ADCQ $0x00, R8 - IMULQ R12, DX + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 ADCQ DX, R8 MOVQ R13, R10 MOVQ R14, R11 @@ -4075,174 +797,261 @@ openAVX2Tail384LoopB: ADDQ R15, R10 ADCQ R8, R11 ADCQ $0x00, R12 - LEAQ 16(BX), BX -openAVX2Tail384LoopA: - VPADDD Y14, Y0, Y0 - VPXOR Y0, Y4, Y4 - VPSHUFB ·rol16<>+0(SB), Y4, Y4 - VPADDD Y4, Y12, Y12 - VPXOR Y12, Y14, Y14 - VPSLLD $0x0c, Y14, Y3 - VPSRLD $0x14, Y14, Y14 - VPXOR Y3, Y14, Y14 - VPADDD Y14, Y0, Y0 - VPXOR Y0, Y4, Y4 - VPSHUFB ·rol8<>+0(SB), Y4, Y4 - VPADDD Y4, Y12, Y12 - VPXOR Y12, Y14, Y14 - VPSLLD $0x07, Y14, Y3 - VPSRLD $0x19, Y14, Y14 - VPXOR Y3, Y14, Y14 - VPADDD Y9, Y5, Y5 - VPXOR Y5, Y1, Y1 - VPSHUFB ·rol16<>+0(SB), Y1, Y1 - VPADDD Y1, Y13, Y13 - VPXOR Y13, Y9, Y9 - VPSLLD $0x0c, Y9, Y3 - VPSRLD $0x14, Y9, Y9 - VPXOR Y3, Y9, Y9 - VPADDD Y9, Y5, Y5 - VPXOR Y5, Y1, Y1 - VPSHUFB ·rol8<>+0(SB), Y1, Y1 - VPADDD Y1, Y13, Y13 - VPXOR Y13, Y9, Y9 - VPSLLD $0x07, Y9, Y3 - VPSRLD $0x19, Y9, Y9 - VPXOR Y3, Y9, Y9 - VPADDD Y10, Y6, Y6 - VPXOR Y6, Y2, Y2 - VPSHUFB ·rol16<>+0(SB), Y2, Y2 - VPADDD Y2, Y8, Y8 - VPXOR Y8, Y10, Y10 - VPSLLD $0x0c, Y10, Y3 - VPSRLD $0x14, Y10, Y10 - VPXOR Y3, Y10, Y10 - VPADDD Y10, Y6, Y6 - VPXOR Y6, Y2, Y2 - VPSHUFB ·rol8<>+0(SB), Y2, Y2 - VPADDD Y2, Y8, Y8 - VPXOR Y8, Y10, Y10 - VPSLLD $0x07, Y10, Y3 - VPSRLD $0x19, Y10, Y10 - VPXOR Y3, Y10, Y10 - VPALIGNR $0x04, Y14, Y14, Y14 - VPALIGNR $0x04, Y9, Y9, Y9 - VPALIGNR $0x04, Y10, Y10, Y10 - VPALIGNR $0x08, Y12, Y12, Y12 - VPALIGNR $0x08, Y13, Y13, Y13 - VPALIGNR $0x08, Y8, Y8, Y8 - VPALIGNR $0x0c, Y4, Y4, Y4 - VPALIGNR $0x0c, Y1, Y1, Y1 - VPALIGNR $0x0c, Y2, Y2, Y2 - ADDQ (BX), R10 - ADCQ 8(BX), R11 - ADCQ $0x01, R12 - MOVQ (BP), DX - MOVQ DX, R15 - MULXQ R10, R13, R14 - IMULQ R12, R15 - MULXQ R11, AX, DX - ADDQ AX, R14 - ADCQ DX, R15 - MOVQ 8(BP), DX - MULXQ R10, R10, AX - ADDQ R10, R14 - MULXQ R11, R11, R8 - ADCQ R11, R15 - ADCQ $0x00, R8 - IMULQ R12, DX - ADDQ AX, R15 - ADCQ DX, R8 - MOVQ R13, R10 - MOVQ R14, R11 - MOVQ R15, R12 - ANDQ $0x03, R12 - MOVQ R15, R13 - ANDQ $-4, R13 - MOVQ R8, R14 - SHRQ $0x02, R8, R15 - SHRQ $0x02, R8 - ADDQ R13, R10 - ADCQ R14, R11 - ADCQ $0x00, R12 - ADDQ R15, R10 - ADCQ R8, R11 - ADCQ $0x00, R12 - LEAQ 16(BX), BX - INCQ R9 - VPADDD Y14, Y0, Y0 - VPXOR Y0, Y4, Y4 - VPSHUFB ·rol16<>+0(SB), Y4, Y4 - VPADDD Y4, Y12, Y12 - VPXOR Y12, Y14, Y14 - VPSLLD $0x0c, Y14, Y3 - VPSRLD $0x14, Y14, Y14 - VPXOR Y3, Y14, Y14 - VPADDD Y14, Y0, Y0 - VPXOR Y0, Y4, Y4 - VPSHUFB ·rol8<>+0(SB), Y4, Y4 - VPADDD Y4, Y12, Y12 - VPXOR Y12, Y14, Y14 - VPSLLD $0x07, Y14, Y3 - VPSRLD $0x19, Y14, Y14 - VPXOR Y3, Y14, Y14 - VPADDD Y9, Y5, Y5 - VPXOR Y5, Y1, Y1 - VPSHUFB ·rol16<>+0(SB), Y1, Y1 - VPADDD Y1, Y13, Y13 - VPXOR Y13, Y9, Y9 - VPSLLD $0x0c, Y9, Y3 - VPSRLD $0x14, Y9, Y9 - VPXOR Y3, Y9, Y9 - VPADDD Y9, Y5, Y5 - VPXOR Y5, Y1, Y1 - VPSHUFB ·rol8<>+0(SB), Y1, Y1 - VPADDD Y1, Y13, Y13 - VPXOR Y13, Y9, Y9 - VPSLLD $0x07, Y9, Y3 - VPSRLD $0x19, Y9, Y9 - VPXOR Y3, Y9, Y9 - VPADDD Y10, Y6, Y6 - VPXOR Y6, Y2, Y2 - VPSHUFB ·rol16<>+0(SB), Y2, Y2 - VPADDD Y2, Y8, Y8 - VPXOR Y8, Y10, Y10 - VPSLLD $0x0c, Y10, Y3 - VPSRLD $0x14, Y10, Y10 - VPXOR Y3, Y10, Y10 - VPADDD Y10, Y6, Y6 - VPXOR Y6, Y2, Y2 - VPSHUFB ·rol8<>+0(SB), Y2, Y2 - VPADDD Y2, Y8, Y8 - VPXOR Y8, Y10, Y10 - VPSLLD $0x07, Y10, Y3 - VPSRLD $0x19, Y10, Y10 - VPXOR Y3, Y10, Y10 - VPALIGNR $0x0c, Y14, Y14, Y14 - VPALIGNR $0x0c, Y9, Y9, Y9 - VPALIGNR $0x0c, Y10, Y10, Y10 - VPALIGNR $0x08, Y12, Y12, Y12 - VPALIGNR $0x08, Y13, Y13, Y13 - VPALIGNR $0x08, Y8, Y8, Y8 - VPALIGNR $0x04, Y4, Y4, Y4 - VPALIGNR $0x04, Y1, Y1, Y1 - VPALIGNR $0x04, Y2, Y2, Y2 - CMPQ R9, CX - JB openAVX2Tail384LoopB - CMPQ R9, $0x0a - JNE openAVX2Tail384LoopA - MOVQ BX, R9 - SUBQ SI, BX - MOVQ BX, CX - MOVQ 224(BP), BX + // Final reduce + MOVQ R10, R13 + MOVQ R11, R14 + MOVQ R12, R15 + SUBQ $-5, R10 + SBBQ $-1, R11 + SBBQ $0x03, R12 + CMOVQCS R13, R10 + CMOVQCS R14, R11 + CMOVQCS R15, R12 + + // Add in the "s" part of the key + ADDQ 16(BP), R10 + ADCQ 24(BP), R11 + + // Finally, constant time compare to the tag at the end of the message + XORQ AX, AX + MOVQ $0x00000001, DX + XORQ (SI), R10 + XORQ 8(SI), R11 + ORQ R11, R10 + CMOVQEQ DX, AX + + // Return true iff tags are equal + MOVB AX, ret+96(FP) + RET + +openSSETail16: + TESTQ BX, BX + JE openSSEFinalize + + // We can safely load the CT from the end, because it is padded with the MAC + MOVQ BX, R9 + SHLQ $0x04, R9 + LEAQ ·andMask<>+0(SB), R13 + MOVOU (SI), X12 + ADDQ BX, SI + PAND -16(R13)(R9*1), X12 + MOVO X12, 64(BP) + MOVQ X12, R13 + MOVQ 72(BP), R14 + PXOR X1, X12 + + // We can only store one byte at a time, since plaintext can be shorter than 16 bytes +openSSETail16Store: + MOVQ X12, R8 + MOVB R8, (DI) + PSRLDQ $0x01, X12 + INCQ DI + DECQ BX + JNE openSSETail16Store + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + JMP openSSEFinalize -openAVX2Tail384Hash: - ADDQ $0x10, CX - CMPQ CX, BX - JGT openAVX2Tail384HashEnd - ADDQ (R9), R10 - ADCQ 8(R9), R11 +openAVX2192: + VMOVDQA Y0, Y5 + VMOVDQA Y14, Y9 + VMOVDQA Y12, Y13 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y1 + VMOVDQA Y0, Y6 + VMOVDQA Y14, Y10 + VMOVDQA Y12, Y8 + VMOVDQA Y4, Y2 + VMOVDQA Y1, Y15 + MOVQ $0x0000000a, R9 + +openAVX2192InnerCipherLoop: + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y3 + VPSRLD $0x14, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y3 + VPSRLD $0x19, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPALIGNR $0x04, Y14, Y14, Y14 + VPALIGNR $0x04, Y9, Y9, Y9 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x0c, Y4, Y4, Y4 + VPALIGNR $0x0c, Y1, Y1, Y1 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y3 + VPSRLD $0x14, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y3 + VPSRLD $0x19, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPALIGNR $0x0c, Y14, Y14, Y14 + VPALIGNR $0x0c, Y9, Y9, Y9 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x04, Y4, Y4, Y4 + VPALIGNR $0x04, Y1, Y1, Y1 + DECQ R9 + JNE openAVX2192InnerCipherLoop + VPADDD Y6, Y0, Y0 + VPADDD Y6, Y5, Y5 + VPADDD Y10, Y14, Y14 + VPADDD Y10, Y9, Y9 + VPADDD Y8, Y12, Y12 + VPADDD Y8, Y13, Y13 + VPADDD Y2, Y4, Y4 + VPADDD Y15, Y1, Y1 + VPERM2I128 $0x02, Y0, Y14, Y3 + + // Clamp and store poly key + VPAND ·polyClampMask<>+0(SB), Y3, Y3 + VMOVDQA Y3, (BP) + + // Stream for up to 192 bytes + VPERM2I128 $0x13, Y0, Y14, Y0 + VPERM2I128 $0x13, Y12, Y4, Y14 + VPERM2I128 $0x02, Y5, Y9, Y12 + VPERM2I128 $0x02, Y13, Y1, Y4 + VPERM2I128 $0x13, Y5, Y9, Y5 + VPERM2I128 $0x13, Y13, Y1, Y9 + +openAVX2ShortOpen: + // Hash + MOVQ ad_len+80(FP), R9 + CALL polyHashADInternal<>(SB) + +openAVX2ShortOpenLoop: + CMPQ BX, $0x20 + JB openAVX2ShortTail32 + SUBQ $0x20, BX + + // Load for hashing + ADDQ (SI), R10 + ADCQ 8(SI), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + ADDQ 16(SI), R10 + ADCQ 24(SI), R11 ADCQ $0x01, R12 MOVQ (BP), DX MOVQ DX, R15 @@ -4275,83 +1084,34 @@ openAVX2Tail384Hash: ADDQ R15, R10 ADCQ R8, R11 ADCQ $0x00, R12 - LEAQ 16(R9), R9 - JMP openAVX2Tail384Hash -openAVX2Tail384HashEnd: - VPADDD ·chacha20Constants<>+0(SB), Y0, Y0 - VPADDD ·chacha20Constants<>+0(SB), Y5, Y5 - VPADDD ·chacha20Constants<>+0(SB), Y6, Y6 - VPADDD 32(BP), Y14, Y14 - VPADDD 32(BP), Y9, Y9 - VPADDD 32(BP), Y10, Y10 - VPADDD 64(BP), Y12, Y12 - VPADDD 64(BP), Y13, Y13 - VPADDD 64(BP), Y8, Y8 - VPADDD 96(BP), Y4, Y4 - VPADDD 128(BP), Y1, Y1 - VPADDD 160(BP), Y2, Y2 - VPERM2I128 $0x02, Y0, Y14, Y3 - VPERM2I128 $0x02, Y12, Y4, Y7 - VPERM2I128 $0x13, Y0, Y14, Y11 - VPERM2I128 $0x13, Y12, Y4, Y15 - VPXOR (SI), Y3, Y3 - VPXOR 32(SI), Y7, Y7 - VPXOR 64(SI), Y11, Y11 - VPXOR 96(SI), Y15, Y15 - VMOVDQU Y3, (DI) - VMOVDQU Y7, 32(DI) - VMOVDQU Y11, 64(DI) - VMOVDQU Y15, 96(DI) - VPERM2I128 $0x02, Y5, Y9, Y3 - VPERM2I128 $0x02, Y13, Y1, Y7 - VPERM2I128 $0x13, Y5, Y9, Y11 - VPERM2I128 $0x13, Y13, Y1, Y15 - VPXOR 128(SI), Y3, Y3 - VPXOR 160(SI), Y7, Y7 - VPXOR 192(SI), Y11, Y11 - VPXOR 224(SI), Y15, Y15 - VMOVDQU Y3, 128(DI) - VMOVDQU Y7, 160(DI) - VMOVDQU Y11, 192(DI) - VMOVDQU Y15, 224(DI) - VPERM2I128 $0x02, Y6, Y10, Y0 - VPERM2I128 $0x02, Y8, Y2, Y14 - VPERM2I128 $0x13, Y6, Y10, Y12 - VPERM2I128 $0x13, Y8, Y2, Y4 - LEAQ 256(SI), SI - LEAQ 256(DI), DI - SUBQ $0x00000100, BX - JMP openAVX2TailLoop + // Load for decryption + VPXOR (SI), Y0, Y0 + VMOVDQU Y0, (DI) + LEAQ 32(SI), SI + LEAQ 32(DI), DI -openAVX2Tail512: - VMOVDQU ·chacha20Constants<>+0(SB), Y0 - VMOVDQA Y0, Y5 - VMOVDQA Y0, Y6 - VMOVDQA Y0, Y7 - VMOVDQA 32(BP), Y14 - VMOVDQA Y14, Y9 - VMOVDQA Y14, Y10 - VMOVDQA Y14, Y11 - VMOVDQA 64(BP), Y12 - VMOVDQA Y12, Y13 - VMOVDQA Y12, Y8 - VMOVDQA Y12, Y15 - VMOVDQA 192(BP), Y4 - VPADDD ·avx2IncMask<>+0(SB), Y4, Y4 - VPADDD ·avx2IncMask<>+0(SB), Y4, Y1 - VPADDD ·avx2IncMask<>+0(SB), Y1, Y2 - VPADDD ·avx2IncMask<>+0(SB), Y2, Y3 - VMOVDQA Y4, 96(BP) - VMOVDQA Y1, 128(BP) - VMOVDQA Y2, 160(BP) - VMOVDQA Y3, 192(BP) - XORQ CX, CX - MOVQ SI, R9 + // Shift stream left + VMOVDQA Y14, Y0 + VMOVDQA Y12, Y14 + VMOVDQA Y4, Y12 + VMOVDQA Y5, Y4 + VMOVDQA Y9, Y5 + VMOVDQA Y13, Y9 + VMOVDQA Y1, Y13 + VMOVDQA Y6, Y1 + VMOVDQA Y10, Y6 + JMP openAVX2ShortOpenLoop + +openAVX2ShortTail32: + CMPQ BX, $0x10 + VMOVDQA X0, X1 + JB openAVX2ShortDone + SUBQ $0x10, BX -openAVX2Tail512LoopB: - ADDQ (R9), R10 - ADCQ 8(R9), R11 + // Load for hashing + ADDQ (SI), R10 + ADCQ 8(SI), R11 ADCQ $0x01, R12 MOVQ (BP), DX MOVQ DX, R15 @@ -4384,252 +1144,202 @@ openAVX2Tail512LoopB: ADDQ R15, R10 ADCQ R8, R11 ADCQ $0x00, R12 - LEAQ 16(R9), R9 -openAVX2Tail512LoopA: + // Load for decryption + VPXOR (SI), X0, X12 + VMOVDQU X12, (DI) + LEAQ 16(SI), SI + LEAQ 16(DI), DI + VPERM2I128 $0x11, Y0, Y0, Y0 + VMOVDQA X0, X1 + +openAVX2ShortDone: + VZEROUPPER + JMP openSSETail16 + +openAVX2320: + VMOVDQA Y0, Y5 + VMOVDQA Y14, Y9 + VMOVDQA Y12, Y13 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y1 + VMOVDQA Y0, Y6 + VMOVDQA Y14, Y10 + VMOVDQA Y12, Y8 + VPADDD ·avx2IncMask<>+0(SB), Y1, Y2 + VMOVDQA Y14, Y7 + VMOVDQA Y12, Y11 + VMOVDQA Y4, Y15 + MOVQ $0x0000000a, R9 + +openAVX2320InnerCipherLoop: VPADDD Y14, Y0, Y0 - VPADDD Y9, Y5, Y5 - VPADDD Y10, Y6, Y6 - VPADDD Y11, Y7, Y7 VPXOR Y0, Y4, Y4 - VPXOR Y5, Y1, Y1 - VPXOR Y6, Y2, Y2 - VPXOR Y7, Y3, Y3 VPSHUFB ·rol16<>+0(SB), Y4, Y4 - VPSHUFB ·rol16<>+0(SB), Y1, Y1 - VPSHUFB ·rol16<>+0(SB), Y2, Y2 - VPSHUFB ·rol16<>+0(SB), Y3, Y3 VPADDD Y4, Y12, Y12 - VPADDD Y1, Y13, Y13 - VPADDD Y2, Y8, Y8 - VPADDD Y3, Y15, Y15 VPXOR Y12, Y14, Y14 - VPXOR Y13, Y9, Y9 - VPXOR Y8, Y10, Y10 - VPXOR Y15, Y11, Y11 - VMOVDQA Y15, 224(BP) - VPSLLD $0x0c, Y14, Y15 + VPSLLD $0x0c, Y14, Y3 VPSRLD $0x14, Y14, Y14 - VPXOR Y15, Y14, Y14 - VPSLLD $0x0c, Y9, Y15 - VPSRLD $0x14, Y9, Y9 - VPXOR Y15, Y9, Y9 - VPSLLD $0x0c, Y10, Y15 - VPSRLD $0x14, Y10, Y10 - VPXOR Y15, Y10, Y10 - VPSLLD $0x0c, Y11, Y15 - VPSRLD $0x14, Y11, Y11 - VPXOR Y15, Y11, Y11 - VMOVDQA 224(BP), Y15 - ADDQ (R9), R10 - ADCQ 8(R9), R11 - ADCQ $0x01, R12 - MOVQ (BP), DX - MOVQ DX, R15 - MULXQ R10, R13, R14 - IMULQ R12, R15 - MULXQ R11, AX, DX - ADDQ AX, R14 - ADCQ DX, R15 - MOVQ 8(BP), DX - MULXQ R10, R10, AX - ADDQ R10, R14 - MULXQ R11, R11, R8 - ADCQ R11, R15 - ADCQ $0x00, R8 - IMULQ R12, DX - ADDQ AX, R15 - ADCQ DX, R8 - MOVQ R13, R10 - MOVQ R14, R11 - MOVQ R15, R12 - ANDQ $0x03, R12 - MOVQ R15, R13 - ANDQ $-4, R13 - MOVQ R8, R14 - SHRQ $0x02, R8, R15 - SHRQ $0x02, R8 - ADDQ R13, R10 - ADCQ R14, R11 - ADCQ $0x00, R12 - ADDQ R15, R10 - ADCQ R8, R11 - ADCQ $0x00, R12 + VPXOR Y3, Y14, Y14 VPADDD Y14, Y0, Y0 - VPADDD Y9, Y5, Y5 - VPADDD Y10, Y6, Y6 - VPADDD Y11, Y7, Y7 VPXOR Y0, Y4, Y4 - VPXOR Y5, Y1, Y1 - VPXOR Y6, Y2, Y2 - VPXOR Y7, Y3, Y3 VPSHUFB ·rol8<>+0(SB), Y4, Y4 - VPSHUFB ·rol8<>+0(SB), Y1, Y1 - VPSHUFB ·rol8<>+0(SB), Y2, Y2 - VPSHUFB ·rol8<>+0(SB), Y3, Y3 VPADDD Y4, Y12, Y12 - VPADDD Y1, Y13, Y13 - VPADDD Y2, Y8, Y8 - VPADDD Y3, Y15, Y15 VPXOR Y12, Y14, Y14 - VPXOR Y13, Y9, Y9 - VPXOR Y8, Y10, Y10 - VPXOR Y15, Y11, Y11 - VMOVDQA Y15, 224(BP) - VPSLLD $0x07, Y14, Y15 + VPSLLD $0x07, Y14, Y3 VPSRLD $0x19, Y14, Y14 - VPXOR Y15, Y14, Y14 - VPSLLD $0x07, Y9, Y15 + VPXOR Y3, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y3 + VPSRLD $0x14, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y3 VPSRLD $0x19, Y9, Y9 - VPXOR Y15, Y9, Y9 - VPSLLD $0x07, Y10, Y15 + VPXOR Y3, Y9, Y9 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x0c, Y10, Y3 + VPSRLD $0x14, Y10, Y10 + VPXOR Y3, Y10, Y10 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x07, Y10, Y3 VPSRLD $0x19, Y10, Y10 - VPXOR Y15, Y10, Y10 - VPSLLD $0x07, Y11, Y15 - VPSRLD $0x19, Y11, Y11 - VPXOR Y15, Y11, Y11 - VMOVDQA 224(BP), Y15 + VPXOR Y3, Y10, Y10 VPALIGNR $0x04, Y14, Y14, Y14 VPALIGNR $0x04, Y9, Y9, Y9 VPALIGNR $0x04, Y10, Y10, Y10 - VPALIGNR $0x04, Y11, Y11, Y11 VPALIGNR $0x08, Y12, Y12, Y12 VPALIGNR $0x08, Y13, Y13, Y13 VPALIGNR $0x08, Y8, Y8, Y8 - VPALIGNR $0x08, Y15, Y15, Y15 VPALIGNR $0x0c, Y4, Y4, Y4 VPALIGNR $0x0c, Y1, Y1, Y1 VPALIGNR $0x0c, Y2, Y2, Y2 - VPALIGNR $0x0c, Y3, Y3, Y3 VPADDD Y14, Y0, Y0 - VPADDD Y9, Y5, Y5 - VPADDD Y10, Y6, Y6 - VPADDD Y11, Y7, Y7 VPXOR Y0, Y4, Y4 - VPXOR Y5, Y1, Y1 - VPXOR Y6, Y2, Y2 - VPXOR Y7, Y3, Y3 VPSHUFB ·rol16<>+0(SB), Y4, Y4 - VPSHUFB ·rol16<>+0(SB), Y1, Y1 - VPSHUFB ·rol16<>+0(SB), Y2, Y2 - VPSHUFB ·rol16<>+0(SB), Y3, Y3 VPADDD Y4, Y12, Y12 - VPADDD Y1, Y13, Y13 - VPADDD Y2, Y8, Y8 - VPADDD Y3, Y15, Y15 VPXOR Y12, Y14, Y14 - VPXOR Y13, Y9, Y9 - VPXOR Y8, Y10, Y10 - VPXOR Y15, Y11, Y11 - ADDQ 16(R9), R10 - ADCQ 24(R9), R11 - ADCQ $0x01, R12 - MOVQ (BP), DX - MOVQ DX, R15 - MULXQ R10, R13, R14 - IMULQ R12, R15 - MULXQ R11, AX, DX - ADDQ AX, R14 - ADCQ DX, R15 - MOVQ 8(BP), DX - MULXQ R10, R10, AX - ADDQ R10, R14 - MULXQ R11, R11, R8 - ADCQ R11, R15 - ADCQ $0x00, R8 - IMULQ R12, DX - ADDQ AX, R15 - ADCQ DX, R8 - MOVQ R13, R10 - MOVQ R14, R11 - MOVQ R15, R12 - ANDQ $0x03, R12 - MOVQ R15, R13 - ANDQ $-4, R13 - MOVQ R8, R14 - SHRQ $0x02, R8, R15 - SHRQ $0x02, R8 - ADDQ R13, R10 - ADCQ R14, R11 - ADCQ $0x00, R12 - ADDQ R15, R10 - ADCQ R8, R11 - ADCQ $0x00, R12 - LEAQ 32(R9), R9 - VMOVDQA Y15, 224(BP) - VPSLLD $0x0c, Y14, Y15 + VPSLLD $0x0c, Y14, Y3 VPSRLD $0x14, Y14, Y14 - VPXOR Y15, Y14, Y14 - VPSLLD $0x0c, Y9, Y15 - VPSRLD $0x14, Y9, Y9 - VPXOR Y15, Y9, Y9 - VPSLLD $0x0c, Y10, Y15 - VPSRLD $0x14, Y10, Y10 - VPXOR Y15, Y10, Y10 - VPSLLD $0x0c, Y11, Y15 - VPSRLD $0x14, Y11, Y11 - VPXOR Y15, Y11, Y11 - VMOVDQA 224(BP), Y15 + VPXOR Y3, Y14, Y14 VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y3 + VPSRLD $0x14, Y9, Y9 + VPXOR Y3, Y9, Y9 VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y3 + VPSRLD $0x19, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x0c, Y10, Y3 + VPSRLD $0x14, Y10, Y10 + VPXOR Y3, Y10, Y10 VPADDD Y10, Y6, Y6 - VPADDD Y11, Y7, Y7 - VPXOR Y0, Y4, Y4 - VPXOR Y5, Y1, Y1 VPXOR Y6, Y2, Y2 - VPXOR Y7, Y3, Y3 - VPSHUFB ·rol8<>+0(SB), Y4, Y4 - VPSHUFB ·rol8<>+0(SB), Y1, Y1 VPSHUFB ·rol8<>+0(SB), Y2, Y2 - VPSHUFB ·rol8<>+0(SB), Y3, Y3 - VPADDD Y4, Y12, Y12 - VPADDD Y1, Y13, Y13 VPADDD Y2, Y8, Y8 - VPADDD Y3, Y15, Y15 - VPXOR Y12, Y14, Y14 - VPXOR Y13, Y9, Y9 VPXOR Y8, Y10, Y10 - VPXOR Y15, Y11, Y11 - VMOVDQA Y15, 224(BP) - VPSLLD $0x07, Y14, Y15 - VPSRLD $0x19, Y14, Y14 - VPXOR Y15, Y14, Y14 - VPSLLD $0x07, Y9, Y15 - VPSRLD $0x19, Y9, Y9 - VPXOR Y15, Y9, Y9 - VPSLLD $0x07, Y10, Y15 + VPSLLD $0x07, Y10, Y3 VPSRLD $0x19, Y10, Y10 - VPXOR Y15, Y10, Y10 - VPSLLD $0x07, Y11, Y15 - VPSRLD $0x19, Y11, Y11 - VPXOR Y15, Y11, Y11 - VMOVDQA 224(BP), Y15 + VPXOR Y3, Y10, Y10 VPALIGNR $0x0c, Y14, Y14, Y14 VPALIGNR $0x0c, Y9, Y9, Y9 VPALIGNR $0x0c, Y10, Y10, Y10 - VPALIGNR $0x0c, Y11, Y11, Y11 VPALIGNR $0x08, Y12, Y12, Y12 VPALIGNR $0x08, Y13, Y13, Y13 VPALIGNR $0x08, Y8, Y8, Y8 - VPALIGNR $0x08, Y15, Y15, Y15 VPALIGNR $0x04, Y4, Y4, Y4 VPALIGNR $0x04, Y1, Y1, Y1 VPALIGNR $0x04, Y2, Y2, Y2 - VPALIGNR $0x04, Y3, Y3, Y3 - INCQ CX - CMPQ CX, $0x04 - JLT openAVX2Tail512LoopB - CMPQ CX, $0x0a - JNE openAVX2Tail512LoopA - MOVQ BX, CX - SUBQ $0x00000180, CX - ANDQ $-16, CX + DECQ R9 + JNE openAVX2320InnerCipherLoop + VMOVDQA ·chacha20Constants<>+0(SB), Y3 + VPADDD Y3, Y0, Y0 + VPADDD Y3, Y5, Y5 + VPADDD Y3, Y6, Y6 + VPADDD Y7, Y14, Y14 + VPADDD Y7, Y9, Y9 + VPADDD Y7, Y10, Y10 + VPADDD Y11, Y12, Y12 + VPADDD Y11, Y13, Y13 + VPADDD Y11, Y8, Y8 + VMOVDQA ·avx2IncMask<>+0(SB), Y3 + VPADDD Y15, Y4, Y4 + VPADDD Y3, Y15, Y15 + VPADDD Y15, Y1, Y1 + VPADDD Y3, Y15, Y15 + VPADDD Y15, Y2, Y2 -openAVX2Tail512HashLoop: - TESTQ CX, CX - JE openAVX2Tail512HashEnd - ADDQ (R9), R10 - ADCQ 8(R9), R11 + // Clamp and store poly key + VPERM2I128 $0x02, Y0, Y14, Y3 + VPAND ·polyClampMask<>+0(SB), Y3, Y3 + VMOVDQA Y3, (BP) + + // Stream for up to 320 bytes + VPERM2I128 $0x13, Y0, Y14, Y0 + VPERM2I128 $0x13, Y12, Y4, Y14 + VPERM2I128 $0x02, Y5, Y9, Y12 + VPERM2I128 $0x02, Y13, Y1, Y4 + VPERM2I128 $0x13, Y5, Y9, Y5 + VPERM2I128 $0x13, Y13, Y1, Y9 + VPERM2I128 $0x02, Y6, Y10, Y13 + VPERM2I128 $0x02, Y8, Y2, Y1 + VPERM2I128 $0x13, Y6, Y10, Y6 + VPERM2I128 $0x13, Y8, Y2, Y10 + JMP openAVX2ShortOpen + +openAVX2Tail128: + // Need to decrypt up to 128 bytes - prepare two blocks + VMOVDQA ·chacha20Constants<>+0(SB), Y5 + VMOVDQA 32(BP), Y9 + VMOVDQA 64(BP), Y13 + VMOVDQA 192(BP), Y1 + VPADDD ·avx2IncMask<>+0(SB), Y1, Y1 + VMOVDQA Y1, Y4 + XORQ R9, R9 + MOVQ BX, CX + ANDQ $-16, CX + TESTQ CX, CX + JE openAVX2Tail128LoopB + +openAVX2Tail128LoopA: + ADDQ (SI)(R9*1), R10 + ADCQ 8(SI)(R9*1), R11 ADCQ $0x01, R12 MOVQ (BP), DX MOVQ DX, R15 @@ -4662,884 +1372,137 @@ openAVX2Tail512HashLoop: ADDQ R15, R10 ADCQ R8, R11 ADCQ $0x00, R12 - LEAQ 16(R9), R9 - SUBQ $0x10, CX - JMP openAVX2Tail512HashLoop -openAVX2Tail512HashEnd: - VPADDD ·chacha20Constants<>+0(SB), Y0, Y0 +openAVX2Tail128LoopB: + ADDQ $0x10, R9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y3 + VPSRLD $0x14, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y3 + VPSRLD $0x19, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPALIGNR $0x04, Y9, Y9, Y9 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x0c, Y1, Y1, Y1 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y3 + VPSRLD $0x14, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y3 + VPSRLD $0x19, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPALIGNR $0x0c, Y9, Y9, Y9 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x04, Y1, Y1, Y1 + CMPQ R9, CX + JB openAVX2Tail128LoopA + CMPQ R9, $0xa0 + JNE openAVX2Tail128LoopB VPADDD ·chacha20Constants<>+0(SB), Y5, Y5 - VPADDD ·chacha20Constants<>+0(SB), Y6, Y6 - VPADDD ·chacha20Constants<>+0(SB), Y7, Y7 - VPADDD 32(BP), Y14, Y14 VPADDD 32(BP), Y9, Y9 - VPADDD 32(BP), Y10, Y10 - VPADDD 32(BP), Y11, Y11 - VPADDD 64(BP), Y12, Y12 VPADDD 64(BP), Y13, Y13 - VPADDD 64(BP), Y8, Y8 - VPADDD 64(BP), Y15, Y15 - VPADDD 96(BP), Y4, Y4 - VPADDD 128(BP), Y1, Y1 - VPADDD 160(BP), Y2, Y2 - VPADDD 192(BP), Y3, Y3 - VMOVDQA Y15, 224(BP) - VPERM2I128 $0x02, Y0, Y14, Y15 - VPERM2I128 $0x13, Y0, Y14, Y14 - VPERM2I128 $0x02, Y12, Y4, Y0 - VPERM2I128 $0x13, Y12, Y4, Y12 - VPXOR (SI), Y15, Y15 - VPXOR 32(SI), Y0, Y0 - VPXOR 64(SI), Y14, Y14 - VPXOR 96(SI), Y12, Y12 - VMOVDQU Y15, (DI) - VMOVDQU Y0, 32(DI) - VMOVDQU Y14, 64(DI) - VMOVDQU Y12, 96(DI) - VPERM2I128 $0x02, Y5, Y9, Y0 - VPERM2I128 $0x02, Y13, Y1, Y14 - VPERM2I128 $0x13, Y5, Y9, Y12 - VPERM2I128 $0x13, Y13, Y1, Y4 - VPXOR 128(SI), Y0, Y0 - VPXOR 160(SI), Y14, Y14 - VPXOR 192(SI), Y12, Y12 - VPXOR 224(SI), Y4, Y4 - VMOVDQU Y0, 128(DI) - VMOVDQU Y14, 160(DI) - VMOVDQU Y12, 192(DI) - VMOVDQU Y4, 224(DI) - VPERM2I128 $0x02, Y6, Y10, Y0 - VPERM2I128 $0x02, Y8, Y2, Y14 - VPERM2I128 $0x13, Y6, Y10, Y12 - VPERM2I128 $0x13, Y8, Y2, Y4 - VPXOR 256(SI), Y0, Y0 - VPXOR 288(SI), Y14, Y14 - VPXOR 320(SI), Y12, Y12 - VPXOR 352(SI), Y4, Y4 - VMOVDQU Y0, 256(DI) - VMOVDQU Y14, 288(DI) - VMOVDQU Y12, 320(DI) - VMOVDQU Y4, 352(DI) - VPERM2I128 $0x02, Y7, Y11, Y0 - VPERM2I128 $0x02, 224(BP), Y3, Y14 - VPERM2I128 $0x13, Y7, Y11, Y12 - VPERM2I128 $0x13, 224(BP), Y3, Y4 - LEAQ 384(SI), SI - LEAQ 384(DI), DI - SUBQ $0x00000180, BX - JMP openAVX2TailLoop - -DATA ·chacha20Constants<>+0(SB)/4, $0x61707865 -DATA ·chacha20Constants<>+4(SB)/4, $0x3320646e -DATA ·chacha20Constants<>+8(SB)/4, $0x79622d32 -DATA ·chacha20Constants<>+12(SB)/4, $0x6b206574 -DATA ·chacha20Constants<>+16(SB)/4, $0x61707865 -DATA ·chacha20Constants<>+20(SB)/4, $0x3320646e -DATA ·chacha20Constants<>+24(SB)/4, $0x79622d32 -DATA ·chacha20Constants<>+28(SB)/4, $0x6b206574 -GLOBL ·chacha20Constants<>(SB), RODATA|NOPTR, $32 - -DATA ·polyClampMask<>+0(SB)/8, $0x0ffffffc0fffffff -DATA ·polyClampMask<>+8(SB)/8, $0x0ffffffc0ffffffc -DATA ·polyClampMask<>+16(SB)/8, $0xffffffffffffffff -DATA ·polyClampMask<>+24(SB)/8, $0xffffffffffffffff -GLOBL ·polyClampMask<>(SB), RODATA|NOPTR, $32 - -DATA ·sseIncMask<>+0(SB)/8, $0x0000000000000001 -DATA ·sseIncMask<>+8(SB)/8, $0x0000000000000000 -GLOBL ·sseIncMask<>(SB), RODATA|NOPTR, $16 - -DATA ·andMask<>+0(SB)/8, $0x00000000000000ff -DATA ·andMask<>+8(SB)/8, $0x0000000000000000 -DATA ·andMask<>+16(SB)/8, $0x000000000000ffff -DATA ·andMask<>+24(SB)/8, $0x0000000000000000 -DATA ·andMask<>+32(SB)/8, $0x0000000000ffffff -DATA ·andMask<>+40(SB)/8, $0x0000000000000000 -DATA ·andMask<>+48(SB)/8, $0x00000000ffffffff -DATA ·andMask<>+56(SB)/8, $0x0000000000000000 -DATA ·andMask<>+64(SB)/8, $0x000000ffffffffff -DATA ·andMask<>+72(SB)/8, $0x0000000000000000 -DATA ·andMask<>+80(SB)/8, $0x0000ffffffffffff -DATA ·andMask<>+88(SB)/8, $0x0000000000000000 -DATA ·andMask<>+96(SB)/8, $0x00ffffffffffffff -DATA ·andMask<>+104(SB)/8, $0x0000000000000000 -DATA ·andMask<>+112(SB)/8, $0xffffffffffffffff -DATA ·andMask<>+120(SB)/8, $0x0000000000000000 -DATA ·andMask<>+128(SB)/8, $0xffffffffffffffff -DATA ·andMask<>+136(SB)/8, $0x00000000000000ff -DATA ·andMask<>+144(SB)/8, $0xffffffffffffffff -DATA ·andMask<>+152(SB)/8, $0x000000000000ffff -DATA ·andMask<>+160(SB)/8, $0xffffffffffffffff -DATA ·andMask<>+168(SB)/8, $0x0000000000ffffff -DATA ·andMask<>+176(SB)/8, $0xffffffffffffffff -DATA ·andMask<>+184(SB)/8, $0x00000000ffffffff -DATA ·andMask<>+192(SB)/8, $0xffffffffffffffff -DATA ·andMask<>+200(SB)/8, $0x000000ffffffffff -DATA ·andMask<>+208(SB)/8, $0xffffffffffffffff -DATA ·andMask<>+216(SB)/8, $0x0000ffffffffffff -DATA ·andMask<>+224(SB)/8, $0xffffffffffffffff -DATA ·andMask<>+232(SB)/8, $0x00ffffffffffffff -GLOBL ·andMask<>(SB), RODATA|NOPTR, $240 + VPADDD Y4, Y1, Y1 + VPERM2I128 $0x02, Y5, Y9, Y0 + VPERM2I128 $0x02, Y13, Y1, Y14 + VPERM2I128 $0x13, Y5, Y9, Y12 + VPERM2I128 $0x13, Y13, Y1, Y4 -DATA ·avx2InitMask<>+0(SB)/8, $0x0000000000000000 -DATA ·avx2InitMask<>+8(SB)/8, $0x0000000000000000 -DATA ·avx2InitMask<>+16(SB)/8, $0x0000000000000001 -DATA ·avx2InitMask<>+24(SB)/8, $0x0000000000000000 -GLOBL ·avx2InitMask<>(SB), RODATA|NOPTR, $32 +openAVX2TailLoop: + CMPQ BX, $0x20 + JB openAVX2Tail + SUBQ $0x20, BX -DATA ·rol16<>+0(SB)/8, $0x0504070601000302 -DATA ·rol16<>+8(SB)/8, $0x0d0c0f0e09080b0a -DATA ·rol16<>+16(SB)/8, $0x0504070601000302 -DATA ·rol16<>+24(SB)/8, $0x0d0c0f0e09080b0a -GLOBL ·rol16<>(SB), RODATA|NOPTR, $32 + // Load for decryption + VPXOR (SI), Y0, Y0 + VMOVDQU Y0, (DI) + LEAQ 32(SI), SI + LEAQ 32(DI), DI + VMOVDQA Y14, Y0 + VMOVDQA Y12, Y14 + VMOVDQA Y4, Y12 + JMP openAVX2TailLoop -DATA ·rol8<>+0(SB)/8, $0x0605040702010003 -DATA ·rol8<>+8(SB)/8, $0x0e0d0c0f0a09080b -DATA ·rol8<>+16(SB)/8, $0x0605040702010003 -DATA ·rol8<>+24(SB)/8, $0x0e0d0c0f0a09080b -GLOBL ·rol8<>(SB), RODATA|NOPTR, $32 +openAVX2Tail: + CMPQ BX, $0x10 + VMOVDQA X0, X1 + JB openAVX2TailDone + SUBQ $0x10, BX -DATA ·avx2IncMask<>+0(SB)/8, $0x0000000000000002 -DATA ·avx2IncMask<>+8(SB)/8, $0x0000000000000000 -DATA ·avx2IncMask<>+16(SB)/8, $0x0000000000000002 -DATA ·avx2IncMask<>+24(SB)/8, $0x0000000000000000 -GLOBL ·avx2IncMask<>(SB), RODATA|NOPTR, $32 + // Load for decryption + VPXOR (SI), X0, X12 + VMOVDQU X12, (DI) + LEAQ 16(SI), SI + LEAQ 16(DI), DI + VPERM2I128 $0x11, Y0, Y0, Y0 + VMOVDQA X0, X1 -// func chacha20Poly1305Seal(dst []byte, key []uint32, src []byte, ad []byte) -// Requires: AVX, AVX2, BMI2, CMOV, SSE2 -TEXT ·chacha20Poly1305Seal(SB), $288-96 - MOVQ SP, BP - ADDQ $0x20, BP - ANDQ $-32, BP - MOVQ dst_base+0(FP), DI - MOVQ key_base+24(FP), R8 - MOVQ src_base+48(FP), SI - MOVQ src_len+56(FP), BX - MOVQ ad_base+72(FP), CX - CMPB ·useAVX2+0(SB), $0x01 - JE chacha20Poly1305Seal_AVX2 +openAVX2TailDone: + VZEROUPPER + JMP openSSETail16 - // Special optimization, for very short buffers - CMPQ BX, $0x80 - JBE sealSSE128 - - // In the seal case - prepare the poly key + 3 blocks of stream in the first iteration - MOVOU ·chacha20Constants<>+0(SB), X0 - MOVOU 16(R8), X3 - MOVOU 32(R8), X6 - MOVOU 48(R8), X9 - - // Store state on stack for future use - MOVO X3, 32(BP) - MOVO X6, 48(BP) - - // Load state, increment counter blocks - MOVO X0, X1 - MOVO X3, X4 - MOVO X6, X7 - MOVO X9, X10 - PADDL ·sseIncMask<>+0(SB), X10 - MOVO X1, X2 - MOVO X4, X5 - MOVO X7, X8 - MOVO X10, X11 - PADDL ·sseIncMask<>+0(SB), X11 - MOVO X2, X12 - MOVO X5, X13 - MOVO X8, X14 - MOVO X11, X15 - PADDL ·sseIncMask<>+0(SB), X15 - - // Store counters - MOVO X9, 80(BP) - MOVO X10, 96(BP) - MOVO X11, 112(BP) - MOVO X15, 128(BP) - MOVQ $0x0000000a, R9 - -sealSSEIntroLoop: - MOVO X14, 64(BP) - PADDD X3, X0 - PXOR X0, X9 - ROL16(X9, X14) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X14 - PSLLL $0x0c, X14 - PSRLL $0x14, X3 - PXOR X14, X3 - PADDD X3, X0 - PXOR X0, X9 - ROL8(X9, X14) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X14 - PSLLL $0x07, X14 - PSRLL $0x19, X3 - PXOR X14, X3 - PADDD X4, X1 - PXOR X1, X10 - ROL16(X10, X14) - PADDD X10, X7 - PXOR X7, X4 - MOVO X4, X14 - PSLLL $0x0c, X14 - PSRLL $0x14, X4 - PXOR X14, X4 - PADDD X4, X1 - PXOR X1, X10 - ROL8(X10, X14) - PADDD X10, X7 - PXOR X7, X4 - MOVO X4, X14 - PSLLL $0x07, X14 - PSRLL $0x19, X4 - PXOR X14, X4 - PADDD X5, X2 - PXOR X2, X11 - ROL16(X11, X14) - PADDD X11, X8 - PXOR X8, X5 - MOVO X5, X14 - PSLLL $0x0c, X14 - PSRLL $0x14, X5 - PXOR X14, X5 - PADDD X5, X2 - PXOR X2, X11 - ROL8(X11, X14) - PADDD X11, X8 - PXOR X8, X5 - MOVO X5, X14 - PSLLL $0x07, X14 - PSRLL $0x19, X5 - PXOR X14, X5 - MOVO 64(BP), X14 - MOVO X7, 64(BP) - PADDD X13, X12 - PXOR X12, X15 - ROL16(X15, X7) - PADDD X15, X14 - PXOR X14, X13 - MOVO X13, X7 - PSLLL $0x0c, X7 - PSRLL $0x14, X13 - PXOR X7, X13 - PADDD X13, X12 - PXOR X12, X15 - ROL8(X15, X7) - PADDD X15, X14 - PXOR X14, X13 - MOVO X13, X7 - PSLLL $0x07, X7 - PSRLL $0x19, X13 - PXOR X7, X13 - MOVO 64(BP), X7 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xdb - BYTE $0x04 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xe4 - BYTE $0x04 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xed - BYTE $0x04 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xed - BYTE $0x04 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xf6 - BYTE $0x08 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xff - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xc0 - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xf6 - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xc9 - BYTE $0x0c - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xd2 - BYTE $0x0c - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xdb - BYTE $0x0c - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xff - BYTE $0x0c - MOVO X14, 64(BP) - PADDD X3, X0 - PXOR X0, X9 - ROL16(X9, X14) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X14 - PSLLL $0x0c, X14 - PSRLL $0x14, X3 - PXOR X14, X3 - PADDD X3, X0 - PXOR X0, X9 - ROL8(X9, X14) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X14 - PSLLL $0x07, X14 - PSRLL $0x19, X3 - PXOR X14, X3 - PADDD X4, X1 - PXOR X1, X10 - ROL16(X10, X14) - PADDD X10, X7 - PXOR X7, X4 - MOVO X4, X14 - PSLLL $0x0c, X14 - PSRLL $0x14, X4 - PXOR X14, X4 - PADDD X4, X1 - PXOR X1, X10 - ROL8(X10, X14) - PADDD X10, X7 - PXOR X7, X4 - MOVO X4, X14 - PSLLL $0x07, X14 - PSRLL $0x19, X4 - PXOR X14, X4 - PADDD X5, X2 - PXOR X2, X11 - ROL16(X11, X14) - PADDD X11, X8 - PXOR X8, X5 - MOVO X5, X14 - PSLLL $0x0c, X14 - PSRLL $0x14, X5 - PXOR X14, X5 - PADDD X5, X2 - PXOR X2, X11 - ROL8(X11, X14) - PADDD X11, X8 - PXOR X8, X5 - MOVO X5, X14 - PSLLL $0x07, X14 - PSRLL $0x19, X5 - PXOR X14, X5 - MOVO 64(BP), X14 - MOVO X7, 64(BP) - PADDD X13, X12 - PXOR X12, X15 - ROL16(X15, X7) - PADDD X15, X14 - PXOR X14, X13 - MOVO X13, X7 - PSLLL $0x0c, X7 - PSRLL $0x14, X13 - PXOR X7, X13 - PADDD X13, X12 - PXOR X12, X15 - ROL8(X15, X7) - PADDD X15, X14 - PXOR X14, X13 - MOVO X13, X7 - PSLLL $0x07, X7 - PSRLL $0x19, X13 - PXOR X7, X13 - MOVO 64(BP), X7 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xdb - BYTE $0x0c - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xe4 - BYTE $0x0c - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xed - BYTE $0x0c - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xed - BYTE $0x0c - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xf6 - BYTE $0x08 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xff - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xc0 - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xf6 - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xc9 - BYTE $0x04 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xd2 - BYTE $0x04 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xdb - BYTE $0x04 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xff - BYTE $0x04 - DECQ R9 - JNE sealSSEIntroLoop - - // Add in the state - PADDD ·chacha20Constants<>+0(SB), X0 - PADDD ·chacha20Constants<>+0(SB), X1 - PADDD ·chacha20Constants<>+0(SB), X2 - PADDD ·chacha20Constants<>+0(SB), X12 - PADDD 32(BP), X3 - PADDD 32(BP), X4 - PADDD 32(BP), X5 - PADDD 32(BP), X13 - PADDD 48(BP), X7 - PADDD 48(BP), X8 - PADDD 48(BP), X14 - PADDD 96(BP), X10 - PADDD 112(BP), X11 - PADDD 128(BP), X15 - - // Clamp and store the key - PAND ·polyClampMask<>+0(SB), X0 - MOVO X0, (BP) - MOVO X3, 16(BP) - - // Hash AAD - MOVQ ad_len+80(FP), R9 - CALL polyHashADInternal<>(SB) - MOVOU (SI), X0 - MOVOU 16(SI), X3 - MOVOU 32(SI), X6 - MOVOU 48(SI), X9 - PXOR X0, X1 - PXOR X3, X4 - PXOR X6, X7 - PXOR X9, X10 - MOVOU X1, (DI) - MOVOU X4, 16(DI) - MOVOU X7, 32(DI) - MOVOU X10, 48(DI) - MOVOU 64(SI), X0 - MOVOU 80(SI), X3 - MOVOU 96(SI), X6 - MOVOU 112(SI), X9 - PXOR X0, X2 - PXOR X3, X5 - PXOR X6, X8 - PXOR X9, X11 - MOVOU X2, 64(DI) - MOVOU X5, 80(DI) - MOVOU X8, 96(DI) - MOVOU X11, 112(DI) - MOVQ $0x00000080, CX - SUBQ $0x80, BX - LEAQ 128(SI), SI - MOVO X12, X1 - MOVO X13, X4 - MOVO X14, X7 - MOVO X15, X10 - CMPQ BX, $0x40 - JBE sealSSE128SealHash - MOVOU (SI), X0 - MOVOU 16(SI), X3 - MOVOU 32(SI), X6 - MOVOU 48(SI), X9 - PXOR X0, X12 - PXOR X3, X13 - PXOR X6, X14 - PXOR X9, X15 - MOVOU X12, 128(DI) - MOVOU X13, 144(DI) - MOVOU X14, 160(DI) - MOVOU X15, 176(DI) - ADDQ $0x40, CX - SUBQ $0x40, BX - LEAQ 64(SI), SI - MOVQ $0x00000002, CX - MOVQ $0x00000008, R9 - CMPQ BX, $0x40 - JBE sealSSETail64 - CMPQ BX, $0x80 - JBE sealSSETail128 - CMPQ BX, $0xc0 - JBE sealSSETail192 - -sealSSEMainLoop: - // Load state, increment counter blocks - MOVO ·chacha20Constants<>+0(SB), X0 - MOVO 32(BP), X3 - MOVO 48(BP), X6 - MOVO 128(BP), X9 - PADDL ·sseIncMask<>+0(SB), X9 - MOVO X0, X1 - MOVO X3, X4 - MOVO X6, X7 - MOVO X9, X10 - PADDL ·sseIncMask<>+0(SB), X10 - MOVO X1, X2 - MOVO X4, X5 - MOVO X7, X8 - MOVO X10, X11 - PADDL ·sseIncMask<>+0(SB), X11 - MOVO X2, X12 - MOVO X5, X13 - MOVO X8, X14 - MOVO X11, X15 - PADDL ·sseIncMask<>+0(SB), X15 - - // Store counters - MOVO X9, 80(BP) - MOVO X10, 96(BP) - MOVO X11, 112(BP) - MOVO X15, 128(BP) - -sealSSEInnerLoop: - MOVO X14, 64(BP) - PADDD X3, X0 - PXOR X0, X9 - ROL16(X9, X14) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X14 - PSLLL $0x0c, X14 - PSRLL $0x14, X3 - PXOR X14, X3 - PADDD X3, X0 - PXOR X0, X9 - ROL8(X9, X14) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X14 - PSLLL $0x07, X14 - PSRLL $0x19, X3 - PXOR X14, X3 - PADDD X4, X1 - PXOR X1, X10 - ROL16(X10, X14) - PADDD X10, X7 - PXOR X7, X4 - MOVO X4, X14 - PSLLL $0x0c, X14 - PSRLL $0x14, X4 - PXOR X14, X4 - PADDD X4, X1 - PXOR X1, X10 - ROL8(X10, X14) - PADDD X10, X7 - PXOR X7, X4 - MOVO X4, X14 - PSLLL $0x07, X14 - PSRLL $0x19, X4 - PXOR X14, X4 - PADDD X5, X2 - PXOR X2, X11 - ROL16(X11, X14) - PADDD X11, X8 - PXOR X8, X5 - MOVO X5, X14 - PSLLL $0x0c, X14 - PSRLL $0x14, X5 - PXOR X14, X5 - PADDD X5, X2 - PXOR X2, X11 - ROL8(X11, X14) - PADDD X11, X8 - PXOR X8, X5 - MOVO X5, X14 - PSLLL $0x07, X14 - PSRLL $0x19, X5 - PXOR X14, X5 - MOVO 64(BP), X14 - MOVO X7, 64(BP) - PADDD X13, X12 - PXOR X12, X15 - ROL16(X15, X7) - PADDD X15, X14 - PXOR X14, X13 - MOVO X13, X7 - PSLLL $0x0c, X7 - PSRLL $0x14, X13 - PXOR X7, X13 - PADDD X13, X12 - PXOR X12, X15 - ROL8(X15, X7) - PADDD X15, X14 - PXOR X14, X13 - MOVO X13, X7 - PSLLL $0x07, X7 - PSRLL $0x19, X13 - PXOR X7, X13 - MOVO 64(BP), X7 - ADDQ (DI), R10 - ADCQ 8(DI), R11 +openAVX2Tail256: + VMOVDQA ·chacha20Constants<>+0(SB), Y0 + VMOVDQA Y0, Y5 + VMOVDQA 32(BP), Y14 + VMOVDQA Y14, Y9 + VMOVDQA 64(BP), Y12 + VMOVDQA Y12, Y13 + VMOVDQA 192(BP), Y4 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y4 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y1 + VMOVDQA Y4, Y7 + VMOVDQA Y1, Y11 + + // Compute the number of iterations that will hash data + MOVQ BX, 224(BP) + MOVQ BX, CX + SUBQ $0x80, CX + SHRQ $0x04, CX + MOVQ $0x0000000a, R9 + CMPQ CX, $0x0a + CMOVQGT R9, CX + MOVQ SI, BX + XORQ R9, R9 + +openAVX2Tail256LoopA: + ADDQ (BX), R10 + ADCQ 8(BX), R11 ADCQ $0x01, R12 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xdb - BYTE $0x04 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xe4 - BYTE $0x04 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xed - BYTE $0x04 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xed - BYTE $0x04 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xf6 - BYTE $0x08 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xff - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xc0 - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xf6 - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xc9 - BYTE $0x0c - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xd2 - BYTE $0x0c - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xdb - BYTE $0x0c - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xff - BYTE $0x0c - MOVQ (BP), AX - MOVQ AX, R15 - MULQ R10 - MOVQ AX, R13 - MOVQ DX, R14 - MOVQ (BP), AX - MULQ R11 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 IMULQ R12, R15 + MULXQ R11, AX, DX ADDQ AX, R14 ADCQ DX, R15 - MOVQ 8(BP), AX - MOVQ AX, R8 - MULQ R10 - ADDQ AX, R14 - ADCQ $0x00, DX - MOVQ DX, R10 - MOVQ 8(BP), AX - MULQ R11 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX ADDQ AX, R15 - ADCQ $0x00, DX - LEAQ 16(DI), DI - MOVO X14, 64(BP) - PADDD X3, X0 - PXOR X0, X9 - ROL16(X9, X14) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X14 - PSLLL $0x0c, X14 - PSRLL $0x14, X3 - PXOR X14, X3 - PADDD X3, X0 - PXOR X0, X9 - ROL8(X9, X14) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X14 - PSLLL $0x07, X14 - PSRLL $0x19, X3 - PXOR X14, X3 - PADDD X4, X1 - PXOR X1, X10 - ROL16(X10, X14) - PADDD X10, X7 - PXOR X7, X4 - MOVO X4, X14 - PSLLL $0x0c, X14 - PSRLL $0x14, X4 - PXOR X14, X4 - PADDD X4, X1 - PXOR X1, X10 - ROL8(X10, X14) - PADDD X10, X7 - PXOR X7, X4 - MOVO X4, X14 - PSLLL $0x07, X14 - PSRLL $0x19, X4 - PXOR X14, X4 - PADDD X5, X2 - PXOR X2, X11 - ROL16(X11, X14) - PADDD X11, X8 - PXOR X8, X5 - MOVO X5, X14 - PSLLL $0x0c, X14 - PSRLL $0x14, X5 - PXOR X14, X5 - PADDD X5, X2 - PXOR X2, X11 - ROL8(X11, X14) - PADDD X11, X8 - PXOR X8, X5 - MOVO X5, X14 - PSLLL $0x07, X14 - PSRLL $0x19, X5 - PXOR X14, X5 - MOVO 64(BP), X14 - MOVO X7, 64(BP) - IMULQ R12, R8 - ADDQ R10, R15 ADCQ DX, R8 - PADDD X13, X12 - PXOR X12, X15 - ROL16(X15, X7) - PADDD X15, X14 - PXOR X14, X13 - MOVO X13, X7 - PSLLL $0x0c, X7 - PSRLL $0x14, X13 - PXOR X7, X13 - PADDD X13, X12 - PXOR X12, X15 - ROL8(X15, X7) - PADDD X15, X14 - PXOR X14, X13 - MOVO X13, X7 - PSLLL $0x07, X7 - PSRLL $0x19, X13 - PXOR X7, X13 - MOVO 64(BP), X7 MOVQ R13, R10 MOVQ R14, R11 MOVQ R15, R12 @@ -5555,112 +1518,117 @@ sealSSEInnerLoop: ADDQ R15, R10 ADCQ R8, R11 ADCQ $0x00, R12 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xdb - BYTE $0x0c - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xe4 - BYTE $0x0c - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xed - BYTE $0x0c - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xed - BYTE $0x0c - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xf6 - BYTE $0x08 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xff - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xc0 - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xf6 - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xc9 - BYTE $0x04 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xd2 - BYTE $0x04 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xdb - BYTE $0x04 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xff - BYTE $0x04 - DECQ R9 - JGE sealSSEInnerLoop - ADDQ (DI), R10 - ADCQ 8(DI), R11 + LEAQ 16(BX), BX + +openAVX2Tail256LoopB: + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y3 + VPSRLD $0x14, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y3 + VPSRLD $0x19, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPALIGNR $0x04, Y14, Y14, Y14 + VPALIGNR $0x04, Y9, Y9, Y9 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x0c, Y4, Y4, Y4 + VPALIGNR $0x0c, Y1, Y1, Y1 + INCQ R9 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y3 + VPSRLD $0x14, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y3 + VPSRLD $0x19, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPALIGNR $0x0c, Y14, Y14, Y14 + VPALIGNR $0x0c, Y9, Y9, Y9 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x04, Y4, Y4, Y4 + VPALIGNR $0x04, Y1, Y1, Y1 + CMPQ R9, CX + JB openAVX2Tail256LoopA + CMPQ R9, $0x0a + JNE openAVX2Tail256LoopB + MOVQ BX, R9 + SUBQ SI, BX + MOVQ BX, CX + MOVQ 224(BP), BX + +openAVX2Tail256Hash: + ADDQ $0x10, CX + CMPQ CX, BX + JGT openAVX2Tail256HashEnd + ADDQ (R9), R10 + ADCQ 8(R9), R11 ADCQ $0x01, R12 - MOVQ (BP), AX - MOVQ AX, R15 - MULQ R10 - MOVQ AX, R13 - MOVQ DX, R14 - MOVQ (BP), AX - MULQ R11 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 IMULQ R12, R15 + MULXQ R11, AX, DX ADDQ AX, R14 ADCQ DX, R15 - MOVQ 8(BP), AX - MOVQ AX, R8 - MULQ R10 - ADDQ AX, R14 - ADCQ $0x00, DX - MOVQ DX, R10 - MOVQ 8(BP), AX - MULQ R11 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX ADDQ AX, R15 - ADCQ $0x00, DX - IMULQ R12, R8 - ADDQ R10, R15 ADCQ DX, R8 MOVQ R13, R10 MOVQ R14, R11 @@ -5672,143 +1640,94 @@ sealSSEInnerLoop: SHRQ $0x02, R8, R15 SHRQ $0x02, R8 ADDQ R13, R10 - ADCQ R14, R11 - ADCQ $0x00, R12 - ADDQ R15, R10 - ADCQ R8, R11 - ADCQ $0x00, R12 - LEAQ 16(DI), DI - DECQ CX - JG sealSSEInnerLoop - - // Add in the state - PADDD ·chacha20Constants<>+0(SB), X0 - PADDD ·chacha20Constants<>+0(SB), X1 - PADDD ·chacha20Constants<>+0(SB), X2 - PADDD ·chacha20Constants<>+0(SB), X12 - PADDD 32(BP), X3 - PADDD 32(BP), X4 - PADDD 32(BP), X5 - PADDD 32(BP), X13 - PADDD 48(BP), X6 - PADDD 48(BP), X7 - PADDD 48(BP), X8 - PADDD 48(BP), X14 - PADDD 80(BP), X9 - PADDD 96(BP), X10 - PADDD 112(BP), X11 - PADDD 128(BP), X15 - MOVO X15, 64(BP) - - // Load - xor - store - MOVOU (SI), X15 - PXOR X15, X0 - MOVOU 16(SI), X15 - PXOR X15, X3 - MOVOU 32(SI), X15 - PXOR X15, X6 - MOVOU 48(SI), X15 - PXOR X15, X9 - MOVOU X0, (DI) - MOVOU X3, 16(DI) - MOVOU X6, 32(DI) - MOVOU X9, 48(DI) - MOVO 64(BP), X15 - MOVOU 64(SI), X0 - MOVOU 80(SI), X3 - MOVOU 96(SI), X6 - MOVOU 112(SI), X9 - PXOR X0, X1 - PXOR X3, X4 - PXOR X6, X7 - PXOR X9, X10 - MOVOU X1, 64(DI) - MOVOU X4, 80(DI) - MOVOU X7, 96(DI) - MOVOU X10, 112(DI) - MOVOU 128(SI), X0 - MOVOU 144(SI), X3 - MOVOU 160(SI), X6 - MOVOU 176(SI), X9 - PXOR X0, X2 - PXOR X3, X5 - PXOR X6, X8 - PXOR X9, X11 - MOVOU X2, 128(DI) - MOVOU X5, 144(DI) - MOVOU X8, 160(DI) - MOVOU X11, 176(DI) - ADDQ $0xc0, SI - MOVQ $0x000000c0, CX - SUBQ $0xc0, BX - MOVO X12, X1 - MOVO X13, X4 - MOVO X14, X7 - MOVO X15, X10 - CMPQ BX, $0x40 - JBE sealSSE128SealHash - MOVOU (SI), X0 - MOVOU 16(SI), X3 - MOVOU 32(SI), X6 - MOVOU 48(SI), X9 - PXOR X0, X12 - PXOR X3, X13 - PXOR X6, X14 - PXOR X9, X15 - MOVOU X12, 192(DI) - MOVOU X13, 208(DI) - MOVOU X14, 224(DI) - MOVOU X15, 240(DI) - LEAQ 64(SI), SI - SUBQ $0x40, BX - MOVQ $0x00000006, CX - MOVQ $0x00000004, R9 - CMPQ BX, $0xc0 - JG sealSSEMainLoop - MOVQ BX, CX - TESTQ BX, BX - JE sealSSE128SealHash - MOVQ $0x00000006, CX - CMPQ BX, $0x40 - JBE sealSSETail64 - CMPQ BX, $0x80 - JBE sealSSETail128 - JMP sealSSETail192 - -sealSSETail64: - MOVO ·chacha20Constants<>+0(SB), X1 - MOVO 32(BP), X4 - MOVO 48(BP), X7 - MOVO 128(BP), X10 - PADDL ·sseIncMask<>+0(SB), X10 - MOVO X10, 80(BP) - -sealSSETail64LoopA: - ADDQ (DI), R10 - ADCQ 8(DI), R11 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 16(R9), R9 + JMP openAVX2Tail256Hash + +openAVX2Tail256HashEnd: + VPADDD ·chacha20Constants<>+0(SB), Y0, Y0 + VPADDD ·chacha20Constants<>+0(SB), Y5, Y5 + VPADDD 32(BP), Y14, Y14 + VPADDD 32(BP), Y9, Y9 + VPADDD 64(BP), Y12, Y12 + VPADDD 64(BP), Y13, Y13 + VPADDD Y7, Y4, Y4 + VPADDD Y11, Y1, Y1 + VPERM2I128 $0x02, Y0, Y14, Y6 + VPERM2I128 $0x02, Y12, Y4, Y10 + VPERM2I128 $0x13, Y0, Y14, Y8 + VPERM2I128 $0x13, Y12, Y4, Y2 + VPERM2I128 $0x02, Y5, Y9, Y0 + VPERM2I128 $0x02, Y13, Y1, Y14 + VPERM2I128 $0x13, Y5, Y9, Y12 + VPERM2I128 $0x13, Y13, Y1, Y4 + VPXOR (SI), Y6, Y6 + VPXOR 32(SI), Y10, Y10 + VPXOR 64(SI), Y8, Y8 + VPXOR 96(SI), Y2, Y2 + VMOVDQU Y6, (DI) + VMOVDQU Y10, 32(DI) + VMOVDQU Y8, 64(DI) + VMOVDQU Y2, 96(DI) + LEAQ 128(SI), SI + LEAQ 128(DI), DI + SUBQ $0x80, BX + JMP openAVX2TailLoop + +openAVX2Tail384: + // Need to decrypt up to 384 bytes - prepare six blocks + VMOVDQA ·chacha20Constants<>+0(SB), Y0 + VMOVDQA Y0, Y5 + VMOVDQA Y0, Y6 + VMOVDQA 32(BP), Y14 + VMOVDQA Y14, Y9 + VMOVDQA Y14, Y10 + VMOVDQA 64(BP), Y12 + VMOVDQA Y12, Y13 + VMOVDQA Y12, Y8 + VMOVDQA 192(BP), Y4 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y4 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y1 + VPADDD ·avx2IncMask<>+0(SB), Y1, Y2 + VMOVDQA Y4, 96(BP) + VMOVDQA Y1, 128(BP) + VMOVDQA Y2, 160(BP) + + // Compute the number of iterations that will hash two blocks of data + MOVQ BX, 224(BP) + MOVQ BX, CX + SUBQ $0x00000100, CX + SHRQ $0x04, CX + ADDQ $0x06, CX + MOVQ $0x0000000a, R9 + CMPQ CX, $0x0a + CMOVQGT R9, CX + MOVQ SI, BX + XORQ R9, R9 + +openAVX2Tail384LoopB: + ADDQ (BX), R10 + ADCQ 8(BX), R11 ADCQ $0x01, R12 - MOVQ (BP), AX - MOVQ AX, R15 - MULQ R10 - MOVQ AX, R13 - MOVQ DX, R14 - MOVQ (BP), AX - MULQ R11 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 IMULQ R12, R15 + MULXQ R11, AX, DX ADDQ AX, R14 ADCQ DX, R15 - MOVQ 8(BP), AX - MOVQ AX, R8 - MULQ R10 - ADDQ AX, R14 - ADCQ $0x00, DX - MOVQ DX, R10 - MOVQ 8(BP), AX - MULQ R11 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX ADDQ AX, R15 - ADCQ $0x00, DX - IMULQ R12, R8 - ADDQ R10, R15 ADCQ DX, R8 MOVQ R13, R10 MOVQ R14, R11 @@ -5825,175 +1744,190 @@ sealSSETail64LoopA: ADDQ R15, R10 ADCQ R8, R11 ADCQ $0x00, R12 - LEAQ 16(DI), DI + LEAQ 16(BX), BX + +openAVX2Tail384LoopA: + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y3 + VPSRLD $0x14, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y3 + VPSRLD $0x19, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x0c, Y10, Y3 + VPSRLD $0x14, Y10, Y10 + VPXOR Y3, Y10, Y10 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x07, Y10, Y3 + VPSRLD $0x19, Y10, Y10 + VPXOR Y3, Y10, Y10 + VPALIGNR $0x04, Y14, Y14, Y14 + VPALIGNR $0x04, Y9, Y9, Y9 + VPALIGNR $0x04, Y10, Y10, Y10 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x08, Y8, Y8, Y8 + VPALIGNR $0x0c, Y4, Y4, Y4 + VPALIGNR $0x0c, Y1, Y1, Y1 + VPALIGNR $0x0c, Y2, Y2, Y2 + ADDQ (BX), R10 + ADCQ 8(BX), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 16(BX), BX + INCQ R9 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x0c, Y14, Y3 + VPSRLD $0x14, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y14, Y0, Y0 + VPXOR Y0, Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPADDD Y4, Y12, Y12 + VPXOR Y12, Y14, Y14 + VPSLLD $0x07, Y14, Y3 + VPSRLD $0x19, Y14, Y14 + VPXOR Y3, Y14, Y14 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x0c, Y9, Y3 + VPSRLD $0x14, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y9, Y5, Y5 + VPXOR Y5, Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPADDD Y1, Y13, Y13 + VPXOR Y13, Y9, Y9 + VPSLLD $0x07, Y9, Y3 + VPSRLD $0x19, Y9, Y9 + VPXOR Y3, Y9, Y9 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x0c, Y10, Y3 + VPSRLD $0x14, Y10, Y10 + VPXOR Y3, Y10, Y10 + VPADDD Y10, Y6, Y6 + VPXOR Y6, Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPADDD Y2, Y8, Y8 + VPXOR Y8, Y10, Y10 + VPSLLD $0x07, Y10, Y3 + VPSRLD $0x19, Y10, Y10 + VPXOR Y3, Y10, Y10 + VPALIGNR $0x0c, Y14, Y14, Y14 + VPALIGNR $0x0c, Y9, Y9, Y9 + VPALIGNR $0x0c, Y10, Y10, Y10 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x08, Y8, Y8, Y8 + VPALIGNR $0x04, Y4, Y4, Y4 + VPALIGNR $0x04, Y1, Y1, Y1 + VPALIGNR $0x04, Y2, Y2, Y2 + CMPQ R9, CX + JB openAVX2Tail384LoopB + CMPQ R9, $0x0a + JNE openAVX2Tail384LoopA + MOVQ BX, R9 + SUBQ SI, BX + MOVQ BX, CX + MOVQ 224(BP), BX -sealSSETail64LoopB: - PADDD X4, X1 - PXOR X1, X10 - ROL16(X10, X13) - PADDD X10, X7 - PXOR X7, X4 - MOVO X4, X13 - PSLLL $0x0c, X13 - PSRLL $0x14, X4 - PXOR X13, X4 - PADDD X4, X1 - PXOR X1, X10 - ROL8(X10, X13) - PADDD X10, X7 - PXOR X7, X4 - MOVO X4, X13 - PSLLL $0x07, X13 - PSRLL $0x19, X4 - PXOR X13, X4 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xe4 - BYTE $0x04 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xff - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xd2 - BYTE $0x0c - PADDD X4, X1 - PXOR X1, X10 - ROL16(X10, X13) - PADDD X10, X7 - PXOR X7, X4 - MOVO X4, X13 - PSLLL $0x0c, X13 - PSRLL $0x14, X4 - PXOR X13, X4 - PADDD X4, X1 - PXOR X1, X10 - ROL8(X10, X13) - PADDD X10, X7 - PXOR X7, X4 - MOVO X4, X13 - PSLLL $0x07, X13 - PSRLL $0x19, X4 - PXOR X13, X4 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xe4 - BYTE $0x0c - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xff - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xd2 - BYTE $0x04 - ADDQ (DI), R10 - ADCQ 8(DI), R11 - ADCQ $0x01, R12 - MOVQ (BP), AX - MOVQ AX, R15 - MULQ R10 - MOVQ AX, R13 - MOVQ DX, R14 - MOVQ (BP), AX - MULQ R11 - IMULQ R12, R15 - ADDQ AX, R14 - ADCQ DX, R15 - MOVQ 8(BP), AX - MOVQ AX, R8 - MULQ R10 - ADDQ AX, R14 - ADCQ $0x00, DX - MOVQ DX, R10 - MOVQ 8(BP), AX - MULQ R11 - ADDQ AX, R15 - ADCQ $0x00, DX - IMULQ R12, R8 - ADDQ R10, R15 - ADCQ DX, R8 - MOVQ R13, R10 - MOVQ R14, R11 - MOVQ R15, R12 - ANDQ $0x03, R12 - MOVQ R15, R13 - ANDQ $-4, R13 - MOVQ R8, R14 - SHRQ $0x02, R8, R15 - SHRQ $0x02, R8 - ADDQ R13, R10 - ADCQ R14, R11 - ADCQ $0x00, R12 - ADDQ R15, R10 - ADCQ R8, R11 - ADCQ $0x00, R12 - LEAQ 16(DI), DI - DECQ CX - JG sealSSETail64LoopA - DECQ R9 - JGE sealSSETail64LoopB - PADDL ·chacha20Constants<>+0(SB), X1 - PADDL 32(BP), X4 - PADDL 48(BP), X7 - PADDL 80(BP), X10 - JMP sealSSE128Seal - -sealSSETail128: - MOVO ·chacha20Constants<>+0(SB), X0 - MOVO 32(BP), X3 - MOVO 48(BP), X6 - MOVO 128(BP), X9 - PADDL ·sseIncMask<>+0(SB), X9 - MOVO X9, 80(BP) - MOVO X0, X1 - MOVO X3, X4 - MOVO X6, X7 - MOVO X9, X10 - PADDL ·sseIncMask<>+0(SB), X10 - MOVO X10, 96(BP) - -sealSSETail128LoopA: - ADDQ (DI), R10 - ADCQ 8(DI), R11 +openAVX2Tail384Hash: + ADDQ $0x10, CX + CMPQ CX, BX + JGT openAVX2Tail384HashEnd + ADDQ (R9), R10 + ADCQ 8(R9), R11 ADCQ $0x01, R12 - MOVQ (BP), AX - MOVQ AX, R15 - MULQ R10 - MOVQ AX, R13 - MOVQ DX, R14 - MOVQ (BP), AX - MULQ R11 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 IMULQ R12, R15 + MULXQ R11, AX, DX ADDQ AX, R14 ADCQ DX, R15 - MOVQ 8(BP), AX - MOVQ AX, R8 - MULQ R10 - ADDQ AX, R14 - ADCQ $0x00, DX - MOVQ DX, R10 - MOVQ 8(BP), AX - MULQ R11 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX ADDQ AX, R15 - ADCQ $0x00, DX - IMULQ R12, R8 - ADDQ R10, R15 ADCQ DX, R8 MOVQ R13, R10 MOVQ R14, R11 @@ -6010,430 +1944,99 @@ sealSSETail128LoopA: ADDQ R15, R10 ADCQ R8, R11 ADCQ $0x00, R12 - LEAQ 16(DI), DI + LEAQ 16(R9), R9 + JMP openAVX2Tail384Hash -sealSSETail128LoopB: - PADDD X3, X0 - PXOR X0, X9 - ROL16(X9, X12) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X12 - PSLLL $0x0c, X12 - PSRLL $0x14, X3 - PXOR X12, X3 - PADDD X3, X0 - PXOR X0, X9 - ROL8(X9, X12) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X12 - PSLLL $0x07, X12 - PSRLL $0x19, X3 - PXOR X12, X3 - PADDD X4, X1 - PXOR X1, X10 - ROL16(X10, X12) - PADDD X10, X7 - PXOR X7, X4 - MOVO X4, X12 - PSLLL $0x0c, X12 - PSRLL $0x14, X4 - PXOR X12, X4 - PADDD X4, X1 - PXOR X1, X10 - ROL8(X10, X12) - PADDD X10, X7 - PXOR X7, X4 - MOVO X4, X12 - PSLLL $0x07, X12 - PSRLL $0x19, X4 - PXOR X12, X4 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xdb - BYTE $0x04 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xf6 - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xc9 - BYTE $0x0c - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xe4 - BYTE $0x04 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xff - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xd2 - BYTE $0x0c - ADDQ (DI), R10 - ADCQ 8(DI), R11 - ADCQ $0x01, R12 - MOVQ (BP), AX - MOVQ AX, R15 - MULQ R10 - MOVQ AX, R13 - MOVQ DX, R14 - MOVQ (BP), AX - MULQ R11 - IMULQ R12, R15 - ADDQ AX, R14 - ADCQ DX, R15 - MOVQ 8(BP), AX - MOVQ AX, R8 - MULQ R10 - ADDQ AX, R14 - ADCQ $0x00, DX - MOVQ DX, R10 - MOVQ 8(BP), AX - MULQ R11 - ADDQ AX, R15 - ADCQ $0x00, DX - IMULQ R12, R8 - ADDQ R10, R15 - ADCQ DX, R8 - MOVQ R13, R10 - MOVQ R14, R11 - MOVQ R15, R12 - ANDQ $0x03, R12 - MOVQ R15, R13 - ANDQ $-4, R13 - MOVQ R8, R14 - SHRQ $0x02, R8, R15 - SHRQ $0x02, R8 - ADDQ R13, R10 - ADCQ R14, R11 - ADCQ $0x00, R12 - ADDQ R15, R10 - ADCQ R8, R11 - ADCQ $0x00, R12 - LEAQ 16(DI), DI - PADDD X3, X0 - PXOR X0, X9 - ROL16(X9, X12) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X12 - PSLLL $0x0c, X12 - PSRLL $0x14, X3 - PXOR X12, X3 - PADDD X3, X0 - PXOR X0, X9 - ROL8(X9, X12) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X12 - PSLLL $0x07, X12 - PSRLL $0x19, X3 - PXOR X12, X3 - PADDD X4, X1 - PXOR X1, X10 - ROL16(X10, X12) - PADDD X10, X7 - PXOR X7, X4 - MOVO X4, X12 - PSLLL $0x0c, X12 - PSRLL $0x14, X4 - PXOR X12, X4 - PADDD X4, X1 - PXOR X1, X10 - ROL8(X10, X12) - PADDD X10, X7 - PXOR X7, X4 - MOVO X4, X12 - PSLLL $0x07, X12 - PSRLL $0x19, X4 - PXOR X12, X4 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xdb - BYTE $0x0c - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xf6 - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xc9 - BYTE $0x04 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xe4 - BYTE $0x0c - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xff - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xd2 - BYTE $0x04 - DECQ CX - JG sealSSETail128LoopA - DECQ R9 - JGE sealSSETail128LoopB - PADDL ·chacha20Constants<>+0(SB), X0 - PADDL ·chacha20Constants<>+0(SB), X1 - PADDL 32(BP), X3 - PADDL 32(BP), X4 - PADDL 48(BP), X6 - PADDL 48(BP), X7 - PADDL 80(BP), X9 - PADDL 96(BP), X10 - MOVOU (SI), X12 - MOVOU 16(SI), X13 - MOVOU 32(SI), X14 - MOVOU 48(SI), X15 - PXOR X12, X0 - PXOR X13, X3 - PXOR X14, X6 - PXOR X15, X9 - MOVOU X0, (DI) - MOVOU X3, 16(DI) - MOVOU X6, 32(DI) - MOVOU X9, 48(DI) - MOVQ $0x00000040, CX - LEAQ 64(SI), SI - SUBQ $0x40, BX - JMP sealSSE128SealHash - -sealSSETail192: - MOVO ·chacha20Constants<>+0(SB), X0 - MOVO 32(BP), X3 - MOVO 48(BP), X6 - MOVO 128(BP), X9 - PADDL ·sseIncMask<>+0(SB), X9 - MOVO X9, 80(BP) - MOVO X0, X1 - MOVO X3, X4 - MOVO X6, X7 - MOVO X9, X10 - PADDL ·sseIncMask<>+0(SB), X10 - MOVO X10, 96(BP) - MOVO X1, X2 - MOVO X4, X5 - MOVO X7, X8 - MOVO X10, X11 - PADDL ·sseIncMask<>+0(SB), X11 - MOVO X11, 112(BP) - -sealSSETail192LoopA: - ADDQ (DI), R10 - ADCQ 8(DI), R11 - ADCQ $0x01, R12 - MOVQ (BP), AX - MOVQ AX, R15 - MULQ R10 - MOVQ AX, R13 - MOVQ DX, R14 - MOVQ (BP), AX - MULQ R11 - IMULQ R12, R15 - ADDQ AX, R14 - ADCQ DX, R15 - MOVQ 8(BP), AX - MOVQ AX, R8 - MULQ R10 - ADDQ AX, R14 - ADCQ $0x00, DX - MOVQ DX, R10 - MOVQ 8(BP), AX - MULQ R11 - ADDQ AX, R15 - ADCQ $0x00, DX - IMULQ R12, R8 - ADDQ R10, R15 - ADCQ DX, R8 - MOVQ R13, R10 - MOVQ R14, R11 - MOVQ R15, R12 - ANDQ $0x03, R12 - MOVQ R15, R13 - ANDQ $-4, R13 - MOVQ R8, R14 - SHRQ $0x02, R8, R15 - SHRQ $0x02, R8 - ADDQ R13, R10 - ADCQ R14, R11 - ADCQ $0x00, R12 - ADDQ R15, R10 - ADCQ R8, R11 - ADCQ $0x00, R12 - LEAQ 16(DI), DI +openAVX2Tail384HashEnd: + VPADDD ·chacha20Constants<>+0(SB), Y0, Y0 + VPADDD ·chacha20Constants<>+0(SB), Y5, Y5 + VPADDD ·chacha20Constants<>+0(SB), Y6, Y6 + VPADDD 32(BP), Y14, Y14 + VPADDD 32(BP), Y9, Y9 + VPADDD 32(BP), Y10, Y10 + VPADDD 64(BP), Y12, Y12 + VPADDD 64(BP), Y13, Y13 + VPADDD 64(BP), Y8, Y8 + VPADDD 96(BP), Y4, Y4 + VPADDD 128(BP), Y1, Y1 + VPADDD 160(BP), Y2, Y2 + VPERM2I128 $0x02, Y0, Y14, Y3 + VPERM2I128 $0x02, Y12, Y4, Y7 + VPERM2I128 $0x13, Y0, Y14, Y11 + VPERM2I128 $0x13, Y12, Y4, Y15 + VPXOR (SI), Y3, Y3 + VPXOR 32(SI), Y7, Y7 + VPXOR 64(SI), Y11, Y11 + VPXOR 96(SI), Y15, Y15 + VMOVDQU Y3, (DI) + VMOVDQU Y7, 32(DI) + VMOVDQU Y11, 64(DI) + VMOVDQU Y15, 96(DI) + VPERM2I128 $0x02, Y5, Y9, Y3 + VPERM2I128 $0x02, Y13, Y1, Y7 + VPERM2I128 $0x13, Y5, Y9, Y11 + VPERM2I128 $0x13, Y13, Y1, Y15 + VPXOR 128(SI), Y3, Y3 + VPXOR 160(SI), Y7, Y7 + VPXOR 192(SI), Y11, Y11 + VPXOR 224(SI), Y15, Y15 + VMOVDQU Y3, 128(DI) + VMOVDQU Y7, 160(DI) + VMOVDQU Y11, 192(DI) + VMOVDQU Y15, 224(DI) + VPERM2I128 $0x02, Y6, Y10, Y0 + VPERM2I128 $0x02, Y8, Y2, Y14 + VPERM2I128 $0x13, Y6, Y10, Y12 + VPERM2I128 $0x13, Y8, Y2, Y4 + LEAQ 256(SI), SI + LEAQ 256(DI), DI + SUBQ $0x00000100, BX + JMP openAVX2TailLoop -sealSSETail192LoopB: - PADDD X3, X0 - PXOR X0, X9 - ROL16(X9, X12) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X12 - PSLLL $0x0c, X12 - PSRLL $0x14, X3 - PXOR X12, X3 - PADDD X3, X0 - PXOR X0, X9 - ROL8(X9, X12) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X12 - PSLLL $0x07, X12 - PSRLL $0x19, X3 - PXOR X12, X3 - PADDD X4, X1 - PXOR X1, X10 - ROL16(X10, X12) - PADDD X10, X7 - PXOR X7, X4 - MOVO X4, X12 - PSLLL $0x0c, X12 - PSRLL $0x14, X4 - PXOR X12, X4 - PADDD X4, X1 - PXOR X1, X10 - ROL8(X10, X12) - PADDD X10, X7 - PXOR X7, X4 - MOVO X4, X12 - PSLLL $0x07, X12 - PSRLL $0x19, X4 - PXOR X12, X4 - PADDD X5, X2 - PXOR X2, X11 - ROL16(X11, X12) - PADDD X11, X8 - PXOR X8, X5 - MOVO X5, X12 - PSLLL $0x0c, X12 - PSRLL $0x14, X5 - PXOR X12, X5 - PADDD X5, X2 - PXOR X2, X11 - ROL8(X11, X12) - PADDD X11, X8 - PXOR X8, X5 - MOVO X5, X12 - PSLLL $0x07, X12 - PSRLL $0x19, X5 - PXOR X12, X5 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xdb - BYTE $0x04 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xf6 - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xc9 - BYTE $0x0c - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xe4 - BYTE $0x04 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xff - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xd2 - BYTE $0x0c - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xed - BYTE $0x04 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xc0 - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xdb - BYTE $0x0c - ADDQ (DI), R10 - ADCQ 8(DI), R11 - ADCQ $0x01, R12 - MOVQ (BP), AX - MOVQ AX, R15 - MULQ R10 - MOVQ AX, R13 - MOVQ DX, R14 - MOVQ (BP), AX - MULQ R11 +openAVX2Tail512: + VMOVDQU ·chacha20Constants<>+0(SB), Y0 + VMOVDQA Y0, Y5 + VMOVDQA Y0, Y6 + VMOVDQA Y0, Y7 + VMOVDQA 32(BP), Y14 + VMOVDQA Y14, Y9 + VMOVDQA Y14, Y10 + VMOVDQA Y14, Y11 + VMOVDQA 64(BP), Y12 + VMOVDQA Y12, Y13 + VMOVDQA Y12, Y8 + VMOVDQA Y12, Y15 + VMOVDQA 192(BP), Y4 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y4 + VPADDD ·avx2IncMask<>+0(SB), Y4, Y1 + VPADDD ·avx2IncMask<>+0(SB), Y1, Y2 + VPADDD ·avx2IncMask<>+0(SB), Y2, Y3 + VMOVDQA Y4, 96(BP) + VMOVDQA Y1, 128(BP) + VMOVDQA Y2, 160(BP) + VMOVDQA Y3, 192(BP) + XORQ CX, CX + MOVQ SI, R9 + +openAVX2Tail512LoopB: + ADDQ (R9), R10 + ADCQ 8(R9), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 IMULQ R12, R15 + MULXQ R11, AX, DX ADDQ AX, R14 ADCQ DX, R15 - MOVQ 8(BP), AX - MOVQ AX, R8 - MULQ R10 - ADDQ AX, R14 - ADCQ $0x00, DX - MOVQ DX, R10 - MOVQ 8(BP), AX - MULQ R11 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX ADDQ AX, R15 - ADCQ $0x00, DX - IMULQ R12, R8 - ADDQ R10, R15 ADCQ DX, R8 MOVQ R13, R10 MOVQ R14, R11 @@ -6450,465 +2053,268 @@ sealSSETail192LoopB: ADDQ R15, R10 ADCQ R8, R11 ADCQ $0x00, R12 - LEAQ 16(DI), DI - PADDD X3, X0 - PXOR X0, X9 - ROL16(X9, X12) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X12 - PSLLL $0x0c, X12 - PSRLL $0x14, X3 - PXOR X12, X3 - PADDD X3, X0 - PXOR X0, X9 - ROL8(X9, X12) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X12 - PSLLL $0x07, X12 - PSRLL $0x19, X3 - PXOR X12, X3 - PADDD X4, X1 - PXOR X1, X10 - ROL16(X10, X12) - PADDD X10, X7 - PXOR X7, X4 - MOVO X4, X12 - PSLLL $0x0c, X12 - PSRLL $0x14, X4 - PXOR X12, X4 - PADDD X4, X1 - PXOR X1, X10 - ROL8(X10, X12) - PADDD X10, X7 - PXOR X7, X4 - MOVO X4, X12 - PSLLL $0x07, X12 - PSRLL $0x19, X4 - PXOR X12, X4 - PADDD X5, X2 - PXOR X2, X11 - ROL16(X11, X12) - PADDD X11, X8 - PXOR X8, X5 - MOVO X5, X12 - PSLLL $0x0c, X12 - PSRLL $0x14, X5 - PXOR X12, X5 - PADDD X5, X2 - PXOR X2, X11 - ROL8(X11, X12) - PADDD X11, X8 - PXOR X8, X5 - MOVO X5, X12 - PSLLL $0x07, X12 - PSRLL $0x19, X5 - PXOR X12, X5 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xdb - BYTE $0x0c - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xf6 - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xc9 - BYTE $0x04 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xe4 - BYTE $0x0c - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xff - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xd2 - BYTE $0x04 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xed - BYTE $0x0c - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xc0 - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xdb - BYTE $0x04 - DECQ CX - JG sealSSETail192LoopA - DECQ R9 - JGE sealSSETail192LoopB - PADDL ·chacha20Constants<>+0(SB), X0 - PADDL ·chacha20Constants<>+0(SB), X1 - PADDL ·chacha20Constants<>+0(SB), X2 - PADDL 32(BP), X3 - PADDL 32(BP), X4 - PADDL 32(BP), X5 - PADDL 48(BP), X6 - PADDL 48(BP), X7 - PADDL 48(BP), X8 - PADDL 80(BP), X9 - PADDL 96(BP), X10 - PADDL 112(BP), X11 - MOVOU (SI), X12 - MOVOU 16(SI), X13 - MOVOU 32(SI), X14 - MOVOU 48(SI), X15 - PXOR X12, X0 - PXOR X13, X3 - PXOR X14, X6 - PXOR X15, X9 - MOVOU X0, (DI) - MOVOU X3, 16(DI) - MOVOU X6, 32(DI) - MOVOU X9, 48(DI) - MOVOU 64(SI), X12 - MOVOU 80(SI), X13 - MOVOU 96(SI), X14 - MOVOU 112(SI), X15 - PXOR X12, X1 - PXOR X13, X4 - PXOR X14, X7 - PXOR X15, X10 - MOVOU X1, 64(DI) - MOVOU X4, 80(DI) - MOVOU X7, 96(DI) - MOVOU X10, 112(DI) - MOVO X2, X1 - MOVO X5, X4 - MOVO X8, X7 - MOVO X11, X10 - MOVQ $0x00000080, CX - LEAQ 128(SI), SI - SUBQ $0x80, BX - JMP sealSSE128SealHash - -sealSSE128: - MOVOU ·chacha20Constants<>+0(SB), X0 - MOVOU 16(R8), X3 - MOVOU 32(R8), X6 - MOVOU 48(R8), X9 - MOVO X0, X1 - MOVO X3, X4 - MOVO X6, X7 - MOVO X9, X10 - PADDL ·sseIncMask<>+0(SB), X10 - MOVO X1, X2 - MOVO X4, X5 - MOVO X7, X8 - MOVO X10, X11 - PADDL ·sseIncMask<>+0(SB), X11 - MOVO X3, X13 - MOVO X6, X14 - MOVO X10, X15 - MOVQ $0x0000000a, R9 - -sealSSE128InnerCipherLoop: - PADDD X3, X0 - PXOR X0, X9 - ROL16(X9, X12) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X12 - PSLLL $0x0c, X12 - PSRLL $0x14, X3 - PXOR X12, X3 - PADDD X3, X0 - PXOR X0, X9 - ROL8(X9, X12) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X12 - PSLLL $0x07, X12 - PSRLL $0x19, X3 - PXOR X12, X3 - PADDD X4, X1 - PXOR X1, X10 - ROL16(X10, X12) - PADDD X10, X7 - PXOR X7, X4 - MOVO X4, X12 - PSLLL $0x0c, X12 - PSRLL $0x14, X4 - PXOR X12, X4 - PADDD X4, X1 - PXOR X1, X10 - ROL8(X10, X12) - PADDD X10, X7 - PXOR X7, X4 - MOVO X4, X12 - PSLLL $0x07, X12 - PSRLL $0x19, X4 - PXOR X12, X4 - PADDD X5, X2 - PXOR X2, X11 - ROL16(X11, X12) - PADDD X11, X8 - PXOR X8, X5 - MOVO X5, X12 - PSLLL $0x0c, X12 - PSRLL $0x14, X5 - PXOR X12, X5 - PADDD X5, X2 - PXOR X2, X11 - ROL8(X11, X12) - PADDD X11, X8 - PXOR X8, X5 - MOVO X5, X12 - PSLLL $0x07, X12 - PSRLL $0x19, X5 - PXOR X12, X5 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xdb - BYTE $0x04 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xe4 - BYTE $0x04 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xed - BYTE $0x04 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xf6 - BYTE $0x08 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xff - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xc0 - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xc9 - BYTE $0x0c - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xd2 - BYTE $0x0c - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xdb - BYTE $0x0c - PADDD X3, X0 - PXOR X0, X9 - ROL16(X9, X12) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X12 - PSLLL $0x0c, X12 - PSRLL $0x14, X3 - PXOR X12, X3 - PADDD X3, X0 - PXOR X0, X9 - ROL8(X9, X12) - PADDD X9, X6 - PXOR X6, X3 - MOVO X3, X12 - PSLLL $0x07, X12 - PSRLL $0x19, X3 - PXOR X12, X3 - PADDD X4, X1 - PXOR X1, X10 - ROL16(X10, X12) - PADDD X10, X7 - PXOR X7, X4 - MOVO X4, X12 - PSLLL $0x0c, X12 - PSRLL $0x14, X4 - PXOR X12, X4 - PADDD X4, X1 - PXOR X1, X10 - ROL8(X10, X12) - PADDD X10, X7 - PXOR X7, X4 - MOVO X4, X12 - PSLLL $0x07, X12 - PSRLL $0x19, X4 - PXOR X12, X4 - PADDD X5, X2 - PXOR X2, X11 - ROL16(X11, X12) - PADDD X11, X8 - PXOR X8, X5 - MOVO X5, X12 - PSLLL $0x0c, X12 - PSRLL $0x14, X5 - PXOR X12, X5 - PADDD X5, X2 - PXOR X2, X11 - ROL8(X11, X12) - PADDD X11, X8 - PXOR X8, X5 - MOVO X5, X12 - PSLLL $0x07, X12 - PSRLL $0x19, X5 - PXOR X12, X5 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xdb - BYTE $0x0c - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xe4 - BYTE $0x0c - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xed - BYTE $0x0c - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xf6 - BYTE $0x08 - BYTE $0x66 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xff - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xc0 - BYTE $0x08 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xc9 - BYTE $0x04 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xd2 - BYTE $0x04 - BYTE $0x66 - BYTE $0x45 - BYTE $0x0f - BYTE $0x3a - BYTE $0x0f - BYTE $0xdb - BYTE $0x04 - DECQ R9 - JNE sealSSE128InnerCipherLoop - - // A0|B0 hold the Poly1305 32-byte key, C0,D0 can be discarded - PADDL ·chacha20Constants<>+0(SB), X0 - PADDL ·chacha20Constants<>+0(SB), X1 - PADDL ·chacha20Constants<>+0(SB), X2 - PADDL X13, X3 - PADDL X13, X4 - PADDL X13, X5 - PADDL X14, X7 - PADDL X14, X8 - PADDL X15, X10 - PADDL ·sseIncMask<>+0(SB), X15 - PADDL X15, X11 - PAND ·polyClampMask<>+0(SB), X0 - MOVOU X0, (BP) - MOVOU X3, 16(BP) + LEAQ 16(R9), R9 - // Hash - MOVQ ad_len+80(FP), R9 - CALL polyHashADInternal<>(SB) - XORQ CX, CX +openAVX2Tail512LoopA: + VPADDD Y14, Y0, Y0 + VPADDD Y9, Y5, Y5 + VPADDD Y10, Y6, Y6 + VPADDD Y11, Y7, Y7 + VPXOR Y0, Y4, Y4 + VPXOR Y5, Y1, Y1 + VPXOR Y6, Y2, Y2 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y3, Y3 + VPADDD Y4, Y12, Y12 + VPADDD Y1, Y13, Y13 + VPADDD Y2, Y8, Y8 + VPADDD Y3, Y15, Y15 + VPXOR Y12, Y14, Y14 + VPXOR Y13, Y9, Y9 + VPXOR Y8, Y10, Y10 + VPXOR Y15, Y11, Y11 + VMOVDQA Y15, 224(BP) + VPSLLD $0x0c, Y14, Y15 + VPSRLD $0x14, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPSLLD $0x0c, Y9, Y15 + VPSRLD $0x14, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPSLLD $0x0c, Y10, Y15 + VPSRLD $0x14, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPSLLD $0x0c, Y11, Y15 + VPSRLD $0x14, Y11, Y11 + VPXOR Y15, Y11, Y11 + VMOVDQA 224(BP), Y15 + ADDQ (R9), R10 + ADCQ 8(R9), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + VPADDD Y14, Y0, Y0 + VPADDD Y9, Y5, Y5 + VPADDD Y10, Y6, Y6 + VPADDD Y11, Y7, Y7 + VPXOR Y0, Y4, Y4 + VPXOR Y5, Y1, Y1 + VPXOR Y6, Y2, Y2 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y3, Y3 + VPADDD Y4, Y12, Y12 + VPADDD Y1, Y13, Y13 + VPADDD Y2, Y8, Y8 + VPADDD Y3, Y15, Y15 + VPXOR Y12, Y14, Y14 + VPXOR Y13, Y9, Y9 + VPXOR Y8, Y10, Y10 + VPXOR Y15, Y11, Y11 + VMOVDQA Y15, 224(BP) + VPSLLD $0x07, Y14, Y15 + VPSRLD $0x19, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPSLLD $0x07, Y9, Y15 + VPSRLD $0x19, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPSLLD $0x07, Y10, Y15 + VPSRLD $0x19, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPSLLD $0x07, Y11, Y15 + VPSRLD $0x19, Y11, Y11 + VPXOR Y15, Y11, Y11 + VMOVDQA 224(BP), Y15 + VPALIGNR $0x04, Y14, Y14, Y14 + VPALIGNR $0x04, Y9, Y9, Y9 + VPALIGNR $0x04, Y10, Y10, Y10 + VPALIGNR $0x04, Y11, Y11, Y11 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x08, Y8, Y8, Y8 + VPALIGNR $0x08, Y15, Y15, Y15 + VPALIGNR $0x0c, Y4, Y4, Y4 + VPALIGNR $0x0c, Y1, Y1, Y1 + VPALIGNR $0x0c, Y2, Y2, Y2 + VPALIGNR $0x0c, Y3, Y3, Y3 + VPADDD Y14, Y0, Y0 + VPADDD Y9, Y5, Y5 + VPADDD Y10, Y6, Y6 + VPADDD Y11, Y7, Y7 + VPXOR Y0, Y4, Y4 + VPXOR Y5, Y1, Y1 + VPXOR Y6, Y2, Y2 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol16<>+0(SB), Y4, Y4 + VPSHUFB ·rol16<>+0(SB), Y1, Y1 + VPSHUFB ·rol16<>+0(SB), Y2, Y2 + VPSHUFB ·rol16<>+0(SB), Y3, Y3 + VPADDD Y4, Y12, Y12 + VPADDD Y1, Y13, Y13 + VPADDD Y2, Y8, Y8 + VPADDD Y3, Y15, Y15 + VPXOR Y12, Y14, Y14 + VPXOR Y13, Y9, Y9 + VPXOR Y8, Y10, Y10 + VPXOR Y15, Y11, Y11 + ADDQ 16(R9), R10 + ADCQ 24(R9), R11 + ADCQ $0x01, R12 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 + IMULQ R12, R15 + MULXQ R11, AX, DX + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX + ADDQ AX, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + LEAQ 32(R9), R9 + VMOVDQA Y15, 224(BP) + VPSLLD $0x0c, Y14, Y15 + VPSRLD $0x14, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPSLLD $0x0c, Y9, Y15 + VPSRLD $0x14, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPSLLD $0x0c, Y10, Y15 + VPSRLD $0x14, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPSLLD $0x0c, Y11, Y15 + VPSRLD $0x14, Y11, Y11 + VPXOR Y15, Y11, Y11 + VMOVDQA 224(BP), Y15 + VPADDD Y14, Y0, Y0 + VPADDD Y9, Y5, Y5 + VPADDD Y10, Y6, Y6 + VPADDD Y11, Y7, Y7 + VPXOR Y0, Y4, Y4 + VPXOR Y5, Y1, Y1 + VPXOR Y6, Y2, Y2 + VPXOR Y7, Y3, Y3 + VPSHUFB ·rol8<>+0(SB), Y4, Y4 + VPSHUFB ·rol8<>+0(SB), Y1, Y1 + VPSHUFB ·rol8<>+0(SB), Y2, Y2 + VPSHUFB ·rol8<>+0(SB), Y3, Y3 + VPADDD Y4, Y12, Y12 + VPADDD Y1, Y13, Y13 + VPADDD Y2, Y8, Y8 + VPADDD Y3, Y15, Y15 + VPXOR Y12, Y14, Y14 + VPXOR Y13, Y9, Y9 + VPXOR Y8, Y10, Y10 + VPXOR Y15, Y11, Y11 + VMOVDQA Y15, 224(BP) + VPSLLD $0x07, Y14, Y15 + VPSRLD $0x19, Y14, Y14 + VPXOR Y15, Y14, Y14 + VPSLLD $0x07, Y9, Y15 + VPSRLD $0x19, Y9, Y9 + VPXOR Y15, Y9, Y9 + VPSLLD $0x07, Y10, Y15 + VPSRLD $0x19, Y10, Y10 + VPXOR Y15, Y10, Y10 + VPSLLD $0x07, Y11, Y15 + VPSRLD $0x19, Y11, Y11 + VPXOR Y15, Y11, Y11 + VMOVDQA 224(BP), Y15 + VPALIGNR $0x0c, Y14, Y14, Y14 + VPALIGNR $0x0c, Y9, Y9, Y9 + VPALIGNR $0x0c, Y10, Y10, Y10 + VPALIGNR $0x0c, Y11, Y11, Y11 + VPALIGNR $0x08, Y12, Y12, Y12 + VPALIGNR $0x08, Y13, Y13, Y13 + VPALIGNR $0x08, Y8, Y8, Y8 + VPALIGNR $0x08, Y15, Y15, Y15 + VPALIGNR $0x04, Y4, Y4, Y4 + VPALIGNR $0x04, Y1, Y1, Y1 + VPALIGNR $0x04, Y2, Y2, Y2 + VPALIGNR $0x04, Y3, Y3, Y3 + INCQ CX + CMPQ CX, $0x04 + JLT openAVX2Tail512LoopB + CMPQ CX, $0x0a + JNE openAVX2Tail512LoopA + MOVQ BX, CX + SUBQ $0x00000180, CX + ANDQ $-16, CX -sealSSE128SealHash: - CMPQ CX, $0x10 - JB sealSSE128Seal - ADDQ (DI), R10 - ADCQ 8(DI), R11 +openAVX2Tail512HashLoop: + TESTQ CX, CX + JE openAVX2Tail512HashEnd + ADDQ (R9), R10 + ADCQ 8(R9), R11 ADCQ $0x01, R12 - MOVQ (BP), AX - MOVQ AX, R15 - MULQ R10 - MOVQ AX, R13 - MOVQ DX, R14 - MOVQ (BP), AX - MULQ R11 + MOVQ (BP), DX + MOVQ DX, R15 + MULXQ R10, R13, R14 IMULQ R12, R15 + MULXQ R11, AX, DX ADDQ AX, R14 ADCQ DX, R15 - MOVQ 8(BP), AX - MOVQ AX, R8 - MULQ R10 - ADDQ AX, R14 - ADCQ $0x00, DX - MOVQ DX, R10 - MOVQ 8(BP), AX - MULQ R11 + MOVQ 8(BP), DX + MULXQ R10, R10, AX + ADDQ R10, R14 + MULXQ R11, R11, R8 + ADCQ R11, R15 + ADCQ $0x00, R8 + IMULQ R12, DX ADDQ AX, R15 - ADCQ $0x00, DX - IMULQ R12, R8 - ADDQ R10, R15 ADCQ DX, R8 MOVQ R13, R10 MOVQ R14, R11 @@ -6925,238 +2331,162 @@ sealSSE128SealHash: ADDQ R15, R10 ADCQ R8, R11 ADCQ $0x00, R12 + LEAQ 16(R9), R9 SUBQ $0x10, CX - ADDQ $0x10, DI - JMP sealSSE128SealHash - -sealSSE128Seal: - CMPQ BX, $0x10 - JB sealSSETail - SUBQ $0x10, BX - - // Load for decryption - MOVOU (SI), X12 - PXOR X12, X1 - MOVOU X1, (DI) - LEAQ 16(SI), SI - LEAQ 16(DI), DI - - // Extract for hashing - MOVQ X1, R13 - PSRLDQ $0x08, X1 - MOVQ X1, R14 - ADDQ R13, R10 - ADCQ R14, R11 - ADCQ $0x01, R12 - MOVQ (BP), AX - MOVQ AX, R15 - MULQ R10 - MOVQ AX, R13 - MOVQ DX, R14 - MOVQ (BP), AX - MULQ R11 - IMULQ R12, R15 - ADDQ AX, R14 - ADCQ DX, R15 - MOVQ 8(BP), AX - MOVQ AX, R8 - MULQ R10 - ADDQ AX, R14 - ADCQ $0x00, DX - MOVQ DX, R10 - MOVQ 8(BP), AX - MULQ R11 - ADDQ AX, R15 - ADCQ $0x00, DX - IMULQ R12, R8 - ADDQ R10, R15 - ADCQ DX, R8 - MOVQ R13, R10 - MOVQ R14, R11 - MOVQ R15, R12 - ANDQ $0x03, R12 - MOVQ R15, R13 - ANDQ $-4, R13 - MOVQ R8, R14 - SHRQ $0x02, R8, R15 - SHRQ $0x02, R8 - ADDQ R13, R10 - ADCQ R14, R11 - ADCQ $0x00, R12 - ADDQ R15, R10 - ADCQ R8, R11 - ADCQ $0x00, R12 - - // Shift the stream "left" - MOVO X4, X1 - MOVO X7, X4 - MOVO X10, X7 - MOVO X2, X10 - MOVO X5, X2 - MOVO X8, X5 - MOVO X11, X8 - JMP sealSSE128Seal + JMP openAVX2Tail512HashLoop -sealSSETail: - TESTQ BX, BX - JE sealSSEFinalize +openAVX2Tail512HashEnd: + VPADDD ·chacha20Constants<>+0(SB), Y0, Y0 + VPADDD ·chacha20Constants<>+0(SB), Y5, Y5 + VPADDD ·chacha20Constants<>+0(SB), Y6, Y6 + VPADDD ·chacha20Constants<>+0(SB), Y7, Y7 + VPADDD 32(BP), Y14, Y14 + VPADDD 32(BP), Y9, Y9 + VPADDD 32(BP), Y10, Y10 + VPADDD 32(BP), Y11, Y11 + VPADDD 64(BP), Y12, Y12 + VPADDD 64(BP), Y13, Y13 + VPADDD 64(BP), Y8, Y8 + VPADDD 64(BP), Y15, Y15 + VPADDD 96(BP), Y4, Y4 + VPADDD 128(BP), Y1, Y1 + VPADDD 160(BP), Y2, Y2 + VPADDD 192(BP), Y3, Y3 + VMOVDQA Y15, 224(BP) + VPERM2I128 $0x02, Y0, Y14, Y15 + VPERM2I128 $0x13, Y0, Y14, Y14 + VPERM2I128 $0x02, Y12, Y4, Y0 + VPERM2I128 $0x13, Y12, Y4, Y12 + VPXOR (SI), Y15, Y15 + VPXOR 32(SI), Y0, Y0 + VPXOR 64(SI), Y14, Y14 + VPXOR 96(SI), Y12, Y12 + VMOVDQU Y15, (DI) + VMOVDQU Y0, 32(DI) + VMOVDQU Y14, 64(DI) + VMOVDQU Y12, 96(DI) + VPERM2I128 $0x02, Y5, Y9, Y0 + VPERM2I128 $0x02, Y13, Y1, Y14 + VPERM2I128 $0x13, Y5, Y9, Y12 + VPERM2I128 $0x13, Y13, Y1, Y4 + VPXOR 128(SI), Y0, Y0 + VPXOR 160(SI), Y14, Y14 + VPXOR 192(SI), Y12, Y12 + VPXOR 224(SI), Y4, Y4 + VMOVDQU Y0, 128(DI) + VMOVDQU Y14, 160(DI) + VMOVDQU Y12, 192(DI) + VMOVDQU Y4, 224(DI) + VPERM2I128 $0x02, Y6, Y10, Y0 + VPERM2I128 $0x02, Y8, Y2, Y14 + VPERM2I128 $0x13, Y6, Y10, Y12 + VPERM2I128 $0x13, Y8, Y2, Y4 + VPXOR 256(SI), Y0, Y0 + VPXOR 288(SI), Y14, Y14 + VPXOR 320(SI), Y12, Y12 + VPXOR 352(SI), Y4, Y4 + VMOVDQU Y0, 256(DI) + VMOVDQU Y14, 288(DI) + VMOVDQU Y12, 320(DI) + VMOVDQU Y4, 352(DI) + VPERM2I128 $0x02, Y7, Y11, Y0 + VPERM2I128 $0x02, 224(BP), Y3, Y14 + VPERM2I128 $0x13, Y7, Y11, Y12 + VPERM2I128 $0x13, 224(BP), Y3, Y4 + LEAQ 384(SI), SI + LEAQ 384(DI), DI + SUBQ $0x00000180, BX + JMP openAVX2TailLoop - // We can only load the PT one byte at a time to avoid read after end of buffer - MOVQ BX, R9 - SHLQ $0x04, R9 - LEAQ ·andMask<>+0(SB), R13 - MOVQ BX, CX - LEAQ -1(SI)(BX*1), SI - XORQ R15, R15 - XORQ R8, R8 - XORQ AX, AX +DATA ·chacha20Constants<>+0(SB)/4, $0x61707865 +DATA ·chacha20Constants<>+4(SB)/4, $0x3320646e +DATA ·chacha20Constants<>+8(SB)/4, $0x79622d32 +DATA ·chacha20Constants<>+12(SB)/4, $0x6b206574 +DATA ·chacha20Constants<>+16(SB)/4, $0x61707865 +DATA ·chacha20Constants<>+20(SB)/4, $0x3320646e +DATA ·chacha20Constants<>+24(SB)/4, $0x79622d32 +DATA ·chacha20Constants<>+28(SB)/4, $0x6b206574 +GLOBL ·chacha20Constants<>(SB), RODATA|NOPTR, $32 -sealSSETailLoadLoop: - SHLQ $0x08, R15, R8 - SHLQ $0x08, R15 - MOVB (SI), AX - XORQ AX, R15 - LEAQ -1(SI), SI - DECQ CX - JNE sealSSETailLoadLoop - MOVQ R15, 64(BP) - MOVQ R8, 72(BP) - PXOR 64(BP), X1 - MOVOU X1, (DI) - MOVOU -16(R13)(R9*1), X12 - PAND X12, X1 - MOVQ X1, R13 - PSRLDQ $0x08, X1 - MOVQ X1, R14 - ADDQ R13, R10 - ADCQ R14, R11 - ADCQ $0x01, R12 - MOVQ (BP), AX - MOVQ AX, R15 - MULQ R10 - MOVQ AX, R13 - MOVQ DX, R14 - MOVQ (BP), AX - MULQ R11 - IMULQ R12, R15 - ADDQ AX, R14 - ADCQ DX, R15 - MOVQ 8(BP), AX - MOVQ AX, R8 - MULQ R10 - ADDQ AX, R14 - ADCQ $0x00, DX - MOVQ DX, R10 - MOVQ 8(BP), AX - MULQ R11 - ADDQ AX, R15 - ADCQ $0x00, DX - IMULQ R12, R8 - ADDQ R10, R15 - ADCQ DX, R8 - MOVQ R13, R10 - MOVQ R14, R11 - MOVQ R15, R12 - ANDQ $0x03, R12 - MOVQ R15, R13 - ANDQ $-4, R13 - MOVQ R8, R14 - SHRQ $0x02, R8, R15 - SHRQ $0x02, R8 - ADDQ R13, R10 - ADCQ R14, R11 - ADCQ $0x00, R12 - ADDQ R15, R10 - ADCQ R8, R11 - ADCQ $0x00, R12 - ADDQ BX, DI +DATA ·avx2InitMask<>+0(SB)/8, $0x0000000000000000 +DATA ·avx2InitMask<>+8(SB)/8, $0x0000000000000000 +DATA ·avx2InitMask<>+16(SB)/8, $0x0000000000000001 +DATA ·avx2InitMask<>+24(SB)/8, $0x0000000000000000 +GLOBL ·avx2InitMask<>(SB), RODATA|NOPTR, $32 -sealSSEFinalize: - // Hash in the buffer lengths - ADDQ ad_len+80(FP), R10 - ADCQ src_len+56(FP), R11 - ADCQ $0x01, R12 - MOVQ (BP), AX - MOVQ AX, R15 - MULQ R10 - MOVQ AX, R13 - MOVQ DX, R14 - MOVQ (BP), AX - MULQ R11 - IMULQ R12, R15 - ADDQ AX, R14 - ADCQ DX, R15 - MOVQ 8(BP), AX - MOVQ AX, R8 - MULQ R10 - ADDQ AX, R14 - ADCQ $0x00, DX - MOVQ DX, R10 - MOVQ 8(BP), AX - MULQ R11 - ADDQ AX, R15 - ADCQ $0x00, DX - IMULQ R12, R8 - ADDQ R10, R15 - ADCQ DX, R8 - MOVQ R13, R10 - MOVQ R14, R11 - MOVQ R15, R12 - ANDQ $0x03, R12 - MOVQ R15, R13 - ANDQ $-4, R13 - MOVQ R8, R14 - SHRQ $0x02, R8, R15 - SHRQ $0x02, R8 - ADDQ R13, R10 - ADCQ R14, R11 - ADCQ $0x00, R12 - ADDQ R15, R10 - ADCQ R8, R11 - ADCQ $0x00, R12 +DATA ·rol16<>+0(SB)/8, $0x0504070601000302 +DATA ·rol16<>+8(SB)/8, $0x0d0c0f0e09080b0a +DATA ·rol16<>+16(SB)/8, $0x0504070601000302 +DATA ·rol16<>+24(SB)/8, $0x0d0c0f0e09080b0a +GLOBL ·rol16<>(SB), RODATA|NOPTR, $32 - // Final reduce - MOVQ R10, R13 - MOVQ R11, R14 - MOVQ R12, R15 - SUBQ $-5, R10 - SBBQ $-1, R11 - SBBQ $0x03, R12 - CMOVQCS R13, R10 - CMOVQCS R14, R11 - CMOVQCS R15, R12 +DATA ·rol8<>+0(SB)/8, $0x0605040702010003 +DATA ·rol8<>+8(SB)/8, $0x0e0d0c0f0a09080b +DATA ·rol8<>+16(SB)/8, $0x0605040702010003 +DATA ·rol8<>+24(SB)/8, $0x0e0d0c0f0a09080b +GLOBL ·rol8<>(SB), RODATA|NOPTR, $32 - // Add in the "s" part of the key - ADDQ 16(BP), R10 - ADCQ 24(BP), R11 +DATA ·polyClampMask<>+0(SB)/8, $0x0ffffffc0fffffff +DATA ·polyClampMask<>+8(SB)/8, $0x0ffffffc0ffffffc +DATA ·polyClampMask<>+16(SB)/8, $0xffffffffffffffff +DATA ·polyClampMask<>+24(SB)/8, $0xffffffffffffffff +GLOBL ·polyClampMask<>(SB), RODATA|NOPTR, $32 - // Finally store the tag at the end of the message - MOVQ R10, (DI) - MOVQ R11, 8(DI) - RET +DATA ·avx2IncMask<>+0(SB)/8, $0x0000000000000002 +DATA ·avx2IncMask<>+8(SB)/8, $0x0000000000000000 +DATA ·avx2IncMask<>+16(SB)/8, $0x0000000000000002 +DATA ·avx2IncMask<>+24(SB)/8, $0x0000000000000000 +GLOBL ·avx2IncMask<>(SB), RODATA|NOPTR, $32 + +DATA ·andMask<>+0(SB)/8, $0x00000000000000ff +DATA ·andMask<>+8(SB)/8, $0x0000000000000000 +DATA ·andMask<>+16(SB)/8, $0x000000000000ffff +DATA ·andMask<>+24(SB)/8, $0x0000000000000000 +DATA ·andMask<>+32(SB)/8, $0x0000000000ffffff +DATA ·andMask<>+40(SB)/8, $0x0000000000000000 +DATA ·andMask<>+48(SB)/8, $0x00000000ffffffff +DATA ·andMask<>+56(SB)/8, $0x0000000000000000 +DATA ·andMask<>+64(SB)/8, $0x000000ffffffffff +DATA ·andMask<>+72(SB)/8, $0x0000000000000000 +DATA ·andMask<>+80(SB)/8, $0x0000ffffffffffff +DATA ·andMask<>+88(SB)/8, $0x0000000000000000 +DATA ·andMask<>+96(SB)/8, $0x00ffffffffffffff +DATA ·andMask<>+104(SB)/8, $0x0000000000000000 +DATA ·andMask<>+112(SB)/8, $0xffffffffffffffff +DATA ·andMask<>+120(SB)/8, $0x0000000000000000 +DATA ·andMask<>+128(SB)/8, $0xffffffffffffffff +DATA ·andMask<>+136(SB)/8, $0x00000000000000ff +DATA ·andMask<>+144(SB)/8, $0xffffffffffffffff +DATA ·andMask<>+152(SB)/8, $0x000000000000ffff +DATA ·andMask<>+160(SB)/8, $0xffffffffffffffff +DATA ·andMask<>+168(SB)/8, $0x0000000000ffffff +DATA ·andMask<>+176(SB)/8, $0xffffffffffffffff +DATA ·andMask<>+184(SB)/8, $0x00000000ffffffff +DATA ·andMask<>+192(SB)/8, $0xffffffffffffffff +DATA ·andMask<>+200(SB)/8, $0x000000ffffffffff +DATA ·andMask<>+208(SB)/8, $0xffffffffffffffff +DATA ·andMask<>+216(SB)/8, $0x0000ffffffffffff +DATA ·andMask<>+224(SB)/8, $0xffffffffffffffff +DATA ·andMask<>+232(SB)/8, $0x00ffffffffffffff +GLOBL ·andMask<>(SB), RODATA|NOPTR, $240 -chacha20Poly1305Seal_AVX2: +// func chacha20Poly1305Seal(dst []byte, key []uint32, src []byte, ad []byte) +// Requires: AVX, AVX2, BMI2, CMOV, SSE2 +TEXT ·chacha20Poly1305Seal(SB), $288-96 + MOVQ SP, BP + ADDQ $0x20, BP + ANDQ $-32, BP + MOVQ dst_base+0(FP), DI + MOVQ key_base+24(FP), R8 + MOVQ src_base+48(FP), SI + MOVQ src_len+56(FP), BX + MOVQ ad_base+72(FP), CX VZEROUPPER - VMOVDQU ·chacha20Constants<>+0(SB), Y0 - BYTE $0xc4 - BYTE $0x42 - BYTE $0x7d - BYTE $0x5a - BYTE $0x70 - BYTE $0x10 - BYTE $0xc4 - BYTE $0x42 - BYTE $0x7d - BYTE $0x5a - BYTE $0x60 - BYTE $0x20 - BYTE $0xc4 - BYTE $0xc2 - BYTE $0x7d - BYTE $0x5a - BYTE $0x60 - BYTE $0x30 - VPADDD ·avx2InitMask<>+0(SB), Y4, Y4 + VMOVDQU ·chacha20Constants<>+0(SB), Y0 + VBROADCASTI128 16(R8), Y14 + VBROADCASTI128 32(R8), Y12 + VBROADCASTI128 48(R8), Y4 + VPADDD ·avx2InitMask<>+0(SB), Y4, Y4 // Special optimizations, for very short buffers CMPQ BX, $0x000000c0 @@ -8170,6 +3500,144 @@ sealAVX2InternalLoopStart: JBE sealAVX2Tail384 JMP sealAVX2Tail512 +sealSSETail: + TESTQ BX, BX + JE sealSSEFinalize + + // We can only load the PT one byte at a time to avoid read after end of buffer + MOVQ BX, R9 + SHLQ $0x04, R9 + LEAQ ·andMask<>+0(SB), R13 + MOVQ BX, CX + LEAQ -1(SI)(BX*1), SI + XORQ R15, R15 + XORQ R8, R8 + XORQ AX, AX + +sealSSETailLoadLoop: + SHLQ $0x08, R15, R8 + SHLQ $0x08, R15 + MOVB (SI), AX + XORQ AX, R15 + LEAQ -1(SI), SI + DECQ CX + JNE sealSSETailLoadLoop + MOVQ R15, 64(BP) + MOVQ R8, 72(BP) + PXOR 64(BP), X1 + MOVOU X1, (DI) + MOVOU -16(R13)(R9*1), X12 + PAND X12, X1 + MOVQ X1, R13 + PSRLDQ $0x08, X1 + MOVQ X1, R14 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + ADDQ BX, DI + +sealSSEFinalize: + // Hash in the buffer lengths + ADDQ ad_len+80(FP), R10 + ADCQ src_len+56(FP), R11 + ADCQ $0x01, R12 + MOVQ (BP), AX + MOVQ AX, R15 + MULQ R10 + MOVQ AX, R13 + MOVQ DX, R14 + MOVQ (BP), AX + MULQ R11 + IMULQ R12, R15 + ADDQ AX, R14 + ADCQ DX, R15 + MOVQ 8(BP), AX + MOVQ AX, R8 + MULQ R10 + ADDQ AX, R14 + ADCQ $0x00, DX + MOVQ DX, R10 + MOVQ 8(BP), AX + MULQ R11 + ADDQ AX, R15 + ADCQ $0x00, DX + IMULQ R12, R8 + ADDQ R10, R15 + ADCQ DX, R8 + MOVQ R13, R10 + MOVQ R14, R11 + MOVQ R15, R12 + ANDQ $0x03, R12 + MOVQ R15, R13 + ANDQ $-4, R13 + MOVQ R8, R14 + SHRQ $0x02, R8, R15 + SHRQ $0x02, R8 + ADDQ R13, R10 + ADCQ R14, R11 + ADCQ $0x00, R12 + ADDQ R15, R10 + ADCQ R8, R11 + ADCQ $0x00, R12 + + // Final reduce + MOVQ R10, R13 + MOVQ R11, R14 + MOVQ R12, R15 + SUBQ $-5, R10 + SBBQ $-1, R11 + SBBQ $0x03, R12 + CMOVQCS R13, R10 + CMOVQCS R14, R11 + CMOVQCS R15, R12 + + // Add in the "s" part of the key + ADDQ 16(BP), R10 + ADCQ 24(BP), R11 + + // Finally store the tag at the end of the message + MOVQ R10, (DI) + MOVQ R11, 8(DI) + RET + seal192AVX2: VMOVDQA Y0, Y5 VMOVDQA Y14, Y9 diff --git a/vendor/golang.org/x/net/http2/server_wrap.go b/vendor/golang.org/x/net/http2/server_wrap.go index a7a09551c4..737f1f0573 100644 --- a/vendor/golang.org/x/net/http2/server_wrap.go +++ b/vendor/golang.org/x/net/http2/server_wrap.go @@ -10,9 +10,11 @@ package http2 import ( "context" + "crypto/tls" "errors" "net" "net/http" + "slices" "sync" "time" ) @@ -44,6 +46,20 @@ func configureServer(s *http.Server, conf *Server) error { h2.IdleTimeout = h1.ReadTimeout } } + + // Register h2 and http/1.1 ALPN protocols on s.TLSConfig, matching + // the pre-wrapping implementation in server.go, so that TLS listeners + // built from s.TLSConfig still negotiate HTTP/2. + if s.TLSConfig == nil { + s.TLSConfig = new(tls.Config) + } + if !slices.Contains(s.TLSConfig.NextProtos, NextProtoTLS) { + s.TLSConfig.NextProtos = append(s.TLSConfig.NextProtos, NextProtoTLS) + } + if !slices.Contains(s.TLSConfig.NextProtos, "http/1.1") { + s.TLSConfig.NextProtos = append(s.TLSConfig.NextProtos, "http/1.1") + } + conf.state = &serverInternalState{ s1: s, } diff --git a/vendor/golang.org/x/net/http2/transport_wrap.go b/vendor/golang.org/x/net/http2/transport_wrap.go index d25d99bdbb..eab2e6b073 100644 --- a/vendor/golang.org/x/net/http2/transport_wrap.go +++ b/vendor/golang.org/x/net/http2/transport_wrap.go @@ -22,8 +22,8 @@ import ( ) func configureTransport(t1 *http.Transport) error { - // ConfigureTransport is a no-op: The http.Transport already supports HTTP/2. - return nil + _, err := configureTransports(t1) + return err } func configureTransports(t1 *http.Transport) (*Transport, error) { @@ -31,6 +31,17 @@ func configureTransports(t1 *http.Transport) (*Transport, error) { // linked to the http.Transport's. tr2 := &Transport{} tr2.configure(t1) + // Enable HTTP/2 on the transport, as the pre-wrapping implementation did: + // net/http does not auto-enable it for a transport with a custom + // TLSClientConfig or dialer. + if t1.TLSClientConfig == nil { + t1.TLSClientConfig = &tls.Config{} + } + if t1.Protocols == nil { + t1.Protocols = new(http.Protocols) + t1.Protocols.SetHTTP1(true) + } + t1.Protocols.SetHTTP2(true) return tr2, nil } diff --git a/vendor/golang.org/x/sync/errgroup/errgroup.go b/vendor/golang.org/x/sync/errgroup/errgroup.go index f69fd75468..c261a8ebbd 100644 --- a/vendor/golang.org/x/sync/errgroup/errgroup.go +++ b/vendor/golang.org/x/sync/errgroup/errgroup.go @@ -109,7 +109,7 @@ func (g *Group) TryGo(f func() error) bool { if g.sem != nil { select { case g.sem <- token{}: - // Note: this allows barging iff channels in general allow barging. + // Note: this allows barging if and only if channels in general allow barging. default: return false } diff --git a/vendor/golang.org/x/sync/semaphore/semaphore.go b/vendor/golang.org/x/sync/semaphore/semaphore.go index b618162aab..040c5bc509 100644 --- a/vendor/golang.org/x/sync/semaphore/semaphore.go +++ b/vendor/golang.org/x/sync/semaphore/semaphore.go @@ -83,7 +83,7 @@ func (s *Weighted) Acquire(ctx context.Context, n int64) error { default: isFront := s.waiters.Front() == elem s.waiters.Remove(elem) - // If we're at the front and there're extra tokens left, notify other waiters. + // If we're at the front and there are extra tokens left, notify other waiters. if isFront && s.size > s.cur { s.notifyWaiters() } @@ -139,15 +139,15 @@ func (s *Weighted) notifyWaiters() { w := next.Value.(waiter) if s.size-s.cur < w.n { - // Not enough tokens for the next waiter. We could keep going (to try to + // Not enough tokens for the next waiter. We could keep going (to try to // find a waiter with a smaller request), but under load that could cause // starvation for large requests; instead, we leave all remaining waiters // blocked. // // Consider a semaphore used as a read-write lock, with N tokens, N - // readers, and one writer. Each reader can Acquire(1) to obtain a read - // lock. The writer can Acquire(N) to obtain a write lock, excluding all - // of the readers. If we allow the readers to jump ahead in the queue, + // readers, and one writer. Each reader can Acquire(1) to obtain a read + // lock. The writer can Acquire(N) to obtain a write lock, excluding all + // of the readers. If we allow the readers to jump ahead in the queue, // the writer will starve — there is always one token available for every // reader. break diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux.go b/vendor/golang.org/x/sys/unix/ztypes_linux.go index d11d5b96a4..526a0d5f43 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_linux.go +++ b/vendor/golang.org/x/sys/unix/ztypes_linux.go @@ -6397,3 +6397,79 @@ const ( MPOL_PREFERRED_MANY = 0x5 MPOL_WEIGHTED_INTERLEAVE = 0x6 ) + +const ( + GPIO_V2_GET_LINEINFO_IOCTL = 0xc100b405 + GPIO_V2_GET_LINE_IOCTL = 0xc250b407 + GPIO_V2_LINE_GET_VALUES_IOCTL = 0xc010b40e + GPIO_V2_LINE_SET_VALUES_IOCTL = 0xc010b40f + GPIO_V2_GET_LINEINFO_WATCH_IOCTL = 0xc100b406 + GPIO_GET_LINEINFO_UNWATCH_IOCTL = 0xc004b40c +) +const ( + GPIO_V2_LINE_ATTR_ID_FLAGS = 0x1 + GPIO_V2_LINE_ATTR_ID_OUTPUT_VALUES = 0x2 + GPIO_V2_LINE_ATTR_ID_DEBOUNCE = 0x3 + GPIO_V2_LINE_CHANGED_REQUESTED = 0x1 + GPIO_V2_LINE_CHANGED_RELEASED = 0x2 + GPIO_V2_LINE_CHANGED_CONFIG = 0x3 + GPIO_V2_LINE_EVENT_RISING_EDGE = 0x1 + GPIO_V2_LINE_EVENT_FALLING_EDGE = 0x2 +) + +type GPIOChipInfo struct { + Name [32]byte + Label [32]byte + Lines uint32 +} +type GPIOV2LineValues struct { + Bits uint64 + Mask uint64 +} +type GPIOV2LineAttribute struct { + Id uint32 + _ uint32 + Flags uint64 +} +type GPIOV2LineConfigAttribute struct { + Attr GPIOV2LineAttribute + Mask uint64 +} +type GPIOV2LineConfig struct { + Flags uint64 + Num_attrs uint32 + _ [5]uint32 + Attrs [10]GPIOV2LineConfigAttribute +} +type GPIOV2LineRequest struct { + Offsets [64]uint32 + Consumer [32]byte + Config GPIOV2LineConfig + Num_lines uint32 + Event_buffer_size uint32 + _ [5]uint32 + Fd int32 +} +type GPIOV2LineInfo struct { + Name [32]byte + Consumer [32]byte + Offset uint32 + Num_attrs uint32 + Flags uint64 + Attrs [10]GPIOV2LineAttribute + _ [4]uint32 +} +type GPIOV2LineInfoChanged struct { + Info GPIOV2LineInfo + Timestamp_ns uint64 + Event_type uint32 + _ [5]uint32 +} +type GPIOV2LineEvent struct { + Timestamp_ns uint64 + Id uint32 + Offset uint32 + Seqno uint32 + Line_seqno uint32 + _ [6]uint32 +} diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux_386.go b/vendor/golang.org/x/sys/unix/ztypes_linux_386.go index 97ef790deb..aede1de7f2 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_linux_386.go +++ b/vendor/golang.org/x/sys/unix/ztypes_linux_386.go @@ -711,3 +711,7 @@ type SysvShmDesc struct { _ uint32 _ uint32 } + +const ( + GPIO_GET_CHIPINFO_IOCTL = 0x8044b401 +) diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux_amd64.go b/vendor/golang.org/x/sys/unix/ztypes_linux_amd64.go index 90b50da680..bb3bc4dc2c 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_linux_amd64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_linux_amd64.go @@ -725,3 +725,7 @@ type SysvShmDesc struct { _ uint64 _ uint64 } + +const ( + GPIO_GET_CHIPINFO_IOCTL = 0x8044b401 +) diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux_arm.go b/vendor/golang.org/x/sys/unix/ztypes_linux_arm.go index acda136851..1fdf4c5175 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_linux_arm.go +++ b/vendor/golang.org/x/sys/unix/ztypes_linux_arm.go @@ -705,3 +705,7 @@ type SysvShmDesc struct { _ uint32 _ uint32 } + +const ( + GPIO_GET_CHIPINFO_IOCTL = 0x8044b401 +) diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux_arm64.go b/vendor/golang.org/x/sys/unix/ztypes_linux_arm64.go index ef7a99e1f9..063e6f0b41 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_linux_arm64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_linux_arm64.go @@ -704,3 +704,7 @@ type SysvShmDesc struct { _ uint64 _ uint64 } + +const ( + GPIO_GET_CHIPINFO_IOCTL = 0x8044b401 +) diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux_loong64.go b/vendor/golang.org/x/sys/unix/ztypes_linux_loong64.go index 966063dfc1..9cf836c708 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_linux_loong64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_linux_loong64.go @@ -705,3 +705,7 @@ type SysvShmDesc struct { _ uint64 _ uint64 } + +const ( + GPIO_GET_CHIPINFO_IOCTL = 0x8044b401 +) diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux_mips.go b/vendor/golang.org/x/sys/unix/ztypes_linux_mips.go index dc53b20b74..1d222fcb31 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_linux_mips.go +++ b/vendor/golang.org/x/sys/unix/ztypes_linux_mips.go @@ -710,3 +710,7 @@ type SysvShmDesc struct { Ctime_high uint16 _ uint16 } + +const ( + GPIO_GET_CHIPINFO_IOCTL = 0x4044b401 +) diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux_mips64.go b/vendor/golang.org/x/sys/unix/ztypes_linux_mips64.go index 9ad0aa8c31..912cc4ab63 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_linux_mips64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_linux_mips64.go @@ -707,3 +707,7 @@ type SysvShmDesc struct { _ uint64 _ uint64 } + +const ( + GPIO_GET_CHIPINFO_IOCTL = 0x4044b401 +) diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux_mips64le.go b/vendor/golang.org/x/sys/unix/ztypes_linux_mips64le.go index 29d55493d5..1e358ef34f 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_linux_mips64le.go +++ b/vendor/golang.org/x/sys/unix/ztypes_linux_mips64le.go @@ -707,3 +707,7 @@ type SysvShmDesc struct { _ uint64 _ uint64 } + +const ( + GPIO_GET_CHIPINFO_IOCTL = 0x4044b401 +) diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux_mipsle.go b/vendor/golang.org/x/sys/unix/ztypes_linux_mipsle.go index a4d9e15848..df59f32f5e 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_linux_mipsle.go +++ b/vendor/golang.org/x/sys/unix/ztypes_linux_mipsle.go @@ -710,3 +710,7 @@ type SysvShmDesc struct { Ctime_high uint16 _ uint16 } + +const ( + GPIO_GET_CHIPINFO_IOCTL = 0x4044b401 +) diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux_ppc.go b/vendor/golang.org/x/sys/unix/ztypes_linux_ppc.go index f8a2977716..29355aa0bf 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_linux_ppc.go +++ b/vendor/golang.org/x/sys/unix/ztypes_linux_ppc.go @@ -718,3 +718,7 @@ type SysvShmDesc struct { _ uint32 _ [4]byte } + +const ( + GPIO_GET_CHIPINFO_IOCTL = 0x4044b401 +) diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux_ppc64.go b/vendor/golang.org/x/sys/unix/ztypes_linux_ppc64.go index 4158d6c4ee..c6083a15d7 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_linux_ppc64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_linux_ppc64.go @@ -713,3 +713,7 @@ type SysvShmDesc struct { _ uint64 _ uint64 } + +const ( + GPIO_GET_CHIPINFO_IOCTL = 0x4044b401 +) diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux_ppc64le.go b/vendor/golang.org/x/sys/unix/ztypes_linux_ppc64le.go index 1035af49f7..6321cc7626 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_linux_ppc64le.go +++ b/vendor/golang.org/x/sys/unix/ztypes_linux_ppc64le.go @@ -713,3 +713,7 @@ type SysvShmDesc struct { _ uint64 _ uint64 } + +const ( + GPIO_GET_CHIPINFO_IOCTL = 0x4044b401 +) diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux_riscv64.go b/vendor/golang.org/x/sys/unix/ztypes_linux_riscv64.go index 2297125d3c..b44f402feb 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_linux_riscv64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_linux_riscv64.go @@ -792,3 +792,7 @@ const ( RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE = 0x6 RISCV_HWPROBE_WHICH_CPUS = 0x1 ) + +const ( + GPIO_GET_CHIPINFO_IOCTL = 0x8044b401 +) diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux_s390x.go b/vendor/golang.org/x/sys/unix/ztypes_linux_s390x.go index 8481e9bd98..b22c795a64 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_linux_s390x.go +++ b/vendor/golang.org/x/sys/unix/ztypes_linux_s390x.go @@ -727,3 +727,7 @@ type SysvShmDesc struct { _ uint64 _ uint64 } + +const ( + GPIO_GET_CHIPINFO_IOCTL = 0x8044b401 +) diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux_sparc64.go b/vendor/golang.org/x/sys/unix/ztypes_linux_sparc64.go index a6828a0310..0b18075b53 100644 --- a/vendor/golang.org/x/sys/unix/ztypes_linux_sparc64.go +++ b/vendor/golang.org/x/sys/unix/ztypes_linux_sparc64.go @@ -708,3 +708,7 @@ type SysvShmDesc struct { _ uint64 _ uint64 } + +const ( + GPIO_GET_CHIPINFO_IOCTL = 0x4044b401 +) diff --git a/vendor/golang.org/x/tools/go/packages/packages.go b/vendor/golang.org/x/tools/go/packages/packages.go index de683684ab..1e5549abe4 100644 --- a/vendor/golang.org/x/tools/go/packages/packages.go +++ b/vendor/golang.org/x/tools/go/packages/packages.go @@ -815,6 +815,12 @@ func (ld *loader) refine(response *DriverResponse) ([]*Package, error) { needsrc: needsrc, goVersion: response.GoVersion, } + // Don't trust the driver to respond with duplicate-free + // package names (go.dev/issue/63822). + if _, ok := ld.pkgs[lpkg.ID]; ok { + return nil, fmt.Errorf("%s response contained duplicate packages for ID %q", + cond(ld.externalDriver, "go/packages driver", "go list"), lpkg.ID) + } ld.pkgs[lpkg.ID] = lpkg if rootIndex >= 0 { initial[rootIndex] = lpkg @@ -1589,3 +1595,11 @@ func usesExportData(cfg *Config) bool { } type unit struct{} + +func cond[T any](cond bool, t, f T) T { + if cond { + return t + } else { + return f + } +} diff --git a/vendor/golang.org/x/tools/internal/gcimporter/iexport.go b/vendor/golang.org/x/tools/internal/gcimporter/iexport.go index 4c9450f4ee..686f171d3b 100644 --- a/vendor/golang.org/x/tools/internal/gcimporter/iexport.go +++ b/vendor/golang.org/x/tools/internal/gcimporter/iexport.go @@ -823,6 +823,9 @@ func (p *iexporter) doDecl(obj types.Object) { w.pos(m.Pos()) w.string(m.Name()) sig, _ := m.Type().(*types.Signature) + if w.p.version >= iexportVersionGenericMethods && w.bool(sig.TypeParams().Len() > 0) { + w.tparamList(obj.Name()+"."+m.Name(), sig.TypeParams(), obj.Pkg()) + } // Receiver type parameters are type arguments of the receiver type, so // their name must be qualified before exporting recv. diff --git a/vendor/golang.org/x/tools/internal/gcimporter/iimport.go b/vendor/golang.org/x/tools/internal/gcimporter/iimport.go index 1ee4e93549..7b1723e0fc 100644 --- a/vendor/golang.org/x/tools/internal/gcimporter/iimport.go +++ b/vendor/golang.org/x/tools/internal/gcimporter/iimport.go @@ -48,13 +48,14 @@ func (r *intReader) uint64() uint64 { // Keep this in sync with constants in iexport.go. const ( - iexportVersionGo1_11 = 0 - iexportVersionPosCol = 1 - iexportVersionGo1_18 = 2 - iexportVersionGenerics = 2 - iexportVersion = iexportVersionGenerics - - iexportVersionCurrent = 2 + iexportVersionGo1_11 = 0 + iexportVersionPosCol = 1 + iexportVersionGo1_18 = 2 + iexportVersionGenerics = 2 + iexportVersionGenericMethods = 3 + iexportVersion = iexportVersionGenericMethods + + iexportVersionCurrent = 3 ) type ident struct { @@ -179,9 +180,9 @@ func iimportCommon(fset *token.FileSet, getPackages GetPackagesFunc, data []byte version = int64(r.uint64()) switch version { - case iexportVersionGo1_18, iexportVersionPosCol, iexportVersionGo1_11: + case iexportVersionGenericMethods, iexportVersionGo1_18, iexportVersionPosCol, iexportVersionGo1_11: default: - if version > iexportVersionGo1_18 { + if version > iexportVersionGenericMethods { errorf("unstable iexport format version %d, just rebuild compiler and std library", version) } else { errorf("unknown iexport format version %d", version) @@ -614,6 +615,10 @@ func (r *importReader) obj(pkg *types.Package, name string) { for n := r.uint64(); n > 0; n-- { mpos := r.pos() mname := r.ident() + var tpars []*types.TypeParam + if r.p.version >= iexportVersionGenericMethods && r.bool() { + tpars = r.tparamList() + } recv := r.param(pkg) // If the receiver has any targs, set those as the @@ -628,8 +633,7 @@ func (r *importReader) obj(pkg *types.Package, name string) { rparams[i] = types.Unalias(targs.At(i)).(*types.TypeParam) } } - msig := r.signature(pkg, recv, rparams, nil) - + msig := r.signature(pkg, recv, rparams, tpars) named.AddMethod(types.NewFunc(mpos, pkg, mname, msig)) } } diff --git a/vendor/golang.org/x/tools/internal/imports/fix.go b/vendor/golang.org/x/tools/internal/imports/fix.go index d29ecfb017..b99ea6c914 100644 --- a/vendor/golang.org/x/tools/internal/imports/fix.go +++ b/vendor/golang.org/x/tools/internal/imports/fix.go @@ -273,7 +273,6 @@ func (p *pass) loadPackageNames(ctx context.Context, imports []*ImportInfo) erro } unknown = append(unknown, imp.ImportPath) } - names, err := p.source.LoadPackageNames(ctx, p.srcDir, unknown) if err != nil { return err diff --git a/vendor/golang.org/x/tools/internal/imports/imports.go b/vendor/golang.org/x/tools/internal/imports/imports.go index b5f5218b5c..072293835d 100644 --- a/vendor/golang.org/x/tools/internal/imports/imports.go +++ b/vendor/golang.org/x/tools/internal/imports/imports.go @@ -74,6 +74,10 @@ func Process(filename string, src []byte, opt *Options) (formatted []byte, err e // Note that filename's directory influences which imports can be chosen, // so it is important that filename be accurate. func FixImports(ctx context.Context, filename string, src []byte, goroot string, logf func(string, ...any), source Source) (fixes []*ImportFix, err error) { + if source == nil { + // In case someone adds a defective call from a new place + panic("source is nil") + } ctx, done := event.Start(ctx, "imports.FixImports") defer done() diff --git a/vendor/golang.org/x/tools/internal/typesinternal/element.go b/vendor/golang.org/x/tools/internal/typesinternal/element.go index 5fe4d8abcb..89eeea1653 100644 --- a/vendor/golang.org/x/tools/internal/typesinternal/element.go +++ b/vendor/golang.org/x/tools/internal/typesinternal/element.go @@ -37,6 +37,10 @@ func ForEachElement(rtypes *typeutil.Map, msets *typeutil.MethodSetCache, T type tmset := msets.MethodSet(T) for method := range tmset.Methods() { sig := method.Type().(*types.Signature) + if sig.TypeParams() != nil { + continue // skip type-parameterized methods + } + // It is tempting to call visit(sig, false) // but, as noted in golang.org/cl/65450043, // the Signature.Recv field is ignored by @@ -123,10 +127,10 @@ func ForEachElement(rtypes *typeutil.Map, msets *typeutil.MethodSetCache, T type case *types.TypeParam, *types.Union: // forEachReachable must not be called on parameterized types. - panic(T) + panic(fmt.Sprintf("ForEachElement called on type containing %T", T)) default: - panic(T) + panic(fmt.Sprintf("ForEachElement called on unexpected type %T", T)) } } visit(T, false) diff --git a/vendor/golang.org/x/tools/internal/typesinternal/types.go b/vendor/golang.org/x/tools/internal/typesinternal/types.go index 6582cc81f5..d2c0b4c5ff 100644 --- a/vendor/golang.org/x/tools/internal/typesinternal/types.go +++ b/vendor/golang.org/x/tools/internal/typesinternal/types.go @@ -22,6 +22,7 @@ import ( "go/ast" "go/token" "go/types" + "iter" "reflect" "golang.org/x/tools/go/ast/inspector" @@ -242,3 +243,30 @@ func ObjectKind(obj types.Object) string { } return "unknown symbol" } + +// ImplicitFieldSelections returns the sequence of implicit embedded fields +// traversed by the given selection. It skips the final leaf field or method. +// The boolean component indicates whether the traversal traversed a pointer. +func ImplicitFieldSelections(seln types.Selection) iter.Seq2[*types.Var, bool] { + return func(yield func(*types.Var, bool) bool) { + var ( + t = seln.Recv() + indices = seln.Index() + ) + for _, idx := range indices[:len(indices)-1] { + ptr, isPtr := t.Underlying().(*types.Pointer) + if isPtr { + t = ptr.Elem() + } + structType, ok := t.Underlying().(*types.Struct) + if !ok { + break + } + field := structType.Field(idx) + if !yield(field, isPtr) { + break + } + t = field.Type() + } + } +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 0e24910c12..7bf302ef77 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -107,7 +107,7 @@ github.com/davecgh/go-spew/spew # github.com/distribution/reference v0.6.0 ## explicit; go 1.20 github.com/distribution/reference -# github.com/docker/cli v29.4.3+incompatible +# github.com/docker/cli v29.5.3+incompatible ## explicit github.com/docker/cli/cli/config github.com/docker/cli/cli/config/configfile @@ -210,7 +210,7 @@ github.com/google/go-cmp/cmp/internal/diff github.com/google/go-cmp/cmp/internal/flags github.com/google/go-cmp/cmp/internal/function github.com/google/go-cmp/cmp/internal/value -# github.com/google/go-containerregistry v0.21.6 +# github.com/google/go-containerregistry v0.21.7 ## explicit; go 1.25.0 github.com/google/go-containerregistry/cmd/crane github.com/google/go-containerregistry/cmd/crane/cmd @@ -220,6 +220,7 @@ github.com/google/go-containerregistry/internal/compression github.com/google/go-containerregistry/internal/editor github.com/google/go-containerregistry/internal/gzip github.com/google/go-containerregistry/internal/httptest +github.com/google/go-containerregistry/internal/limit github.com/google/go-containerregistry/internal/redact github.com/google/go-containerregistry/internal/retry github.com/google/go-containerregistry/internal/retry/wait @@ -582,7 +583,7 @@ go.yaml.in/yaml/v2 # go.yaml.in/yaml/v3 v3.0.4 ## explicit; go 1.16 go.yaml.in/yaml/v3 -# golang.org/x/crypto v0.51.0 +# golang.org/x/crypto v0.53.0 ## explicit; go 1.25.0 golang.org/x/crypto/chacha20 golang.org/x/crypto/chacha20poly1305 @@ -599,7 +600,7 @@ golang.org/x/exp/maps golang.org/x/mod/internal/lazyregexp golang.org/x/mod/module golang.org/x/mod/semver -# golang.org/x/net v0.55.0 +# golang.org/x/net v0.56.0 ## explicit; go 1.25.0 golang.org/x/net/context/ctxhttp golang.org/x/net/http/httpguts @@ -622,21 +623,21 @@ golang.org/x/oauth2/google/internal/stsexchange golang.org/x/oauth2/internal golang.org/x/oauth2/jws golang.org/x/oauth2/jwt -# golang.org/x/sync v0.20.0 +# golang.org/x/sync v0.21.0 ## explicit; go 1.25.0 golang.org/x/sync/errgroup golang.org/x/sync/semaphore -# golang.org/x/sys v0.45.0 +# golang.org/x/sys v0.46.0 ## explicit; go 1.25.0 golang.org/x/sys/cpu golang.org/x/sys/plan9 golang.org/x/sys/unix golang.org/x/sys/windows golang.org/x/sys/windows/registry -# golang.org/x/term v0.43.0 +# golang.org/x/term v0.44.0 ## explicit; go 1.25.0 golang.org/x/term -# golang.org/x/text v0.37.0 +# golang.org/x/text v0.38.0 ## explicit; go 1.25.0 golang.org/x/text/cases golang.org/x/text/encoding @@ -657,7 +658,7 @@ golang.org/x/text/unicode/norm # golang.org/x/time v0.15.0 ## explicit; go 1.25.0 golang.org/x/time/rate -# golang.org/x/tools v0.45.0 +# golang.org/x/tools v0.46.0 ## explicit; go 1.25.0 golang.org/x/tools/go/ast/astutil golang.org/x/tools/go/ast/edge