Reduced vulnerability processing time by grouping vulnerability dictionary by vendor. (#17298)

Reduced vulnerability processing time by grouping vulnerability
dictionary by vendor.
#16858 

Notice `cvefeed.(*Cache).Get`
Before:
<img width="1997" alt="image"
src="https://github.com/fleetdm/fleet/assets/2685025/a3cc5571-cee2-40a6-8b2d-cfed1323d96c">
After:

![image](https://github.com/fleetdm/fleet/assets/2685025/89484f87-a4e9-4c5c-943f-138e1c27a2c6)


# Checklist for submitter

<!-- Note that API documentation changes are now addressed by the
product design team. -->

- [x] Changes file added for user-visible changes in `changes/` or
`orbit/changes/`.
See [Changes
files](https://fleetdm.com/docs/contributing/committing-changes#changes-files)
for more information.
- [ ] Added/updated tests
- [x] Manual QA for all new/changed functionality
This commit is contained in:
Victor Lyuboslavsky 2024-03-04 16:16:32 -06:00 committed by GitHub
parent 960a7a350a
commit 7bbb4a64bb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 30 additions and 7 deletions

View File

@ -0,0 +1 @@
Reduced vulnerability processing time by grouping vulnerability dictionary by vendor.

View File

@ -225,11 +225,9 @@ func TranslateCPEToCVE(
foundSoftwareVulns, foundOSVulns, err := checkCVEs(
ctx,
ds,
logger,
interfaceParsed,
file,
collectVulns,
knownNVDBugRules,
)
if err != nil {
@ -339,11 +337,9 @@ func matchesExactTargetSW(softwareCPETargetSW string, targetSWs []string, config
func checkCVEs(
ctx context.Context,
ds fleet.Datastore,
logger kitlog.Logger,
CPEItems []itemWithNVDMeta,
jsonFile string,
collectVulns bool,
knownNVDBugRules CPEMatchingRules,
) ([]fleet.SoftwareVulnerability, []fleet.OSVulnerability, error) {
dict, err := cvefeed.LoadJSONDictionary(jsonFile)
@ -351,9 +347,30 @@ func checkCVEs(
return nil, nil, err
}
cache := cvefeed.NewCache(dict).SetRequireVersion(true).SetMaxSize(-1)
// This index consumes too much RAM
// cache.Idx = cvefeed.NewIndex(dict)
// Group dictionary by vendor using a map.
// This is done to speed up the matching process (PR https://github.com/fleetdm/fleet/pull/17298).
// A map uses a hash table to store the key-value pairs. By putting multiple vulnerabilities with the same vendor into a map,
// we reduce the number of comparisons needed to find the vulnerabilities that match the CPEs. Specifically, we no longer need to
// compare each CPE with each vulnerability, but only with the vulnerabilities that have the same vendor.
// Further optimization can be done by also using a map for product name comparison.
dictGrouped := make(map[string]cvefeed.Dictionary, len(dict))
for key, vuln := range dict {
attrsArray := vuln.Config()
for _, attrs := range attrsArray {
subDict, ok := dictGrouped[attrs.Vendor]
if !ok {
subDict = make(cvefeed.Dictionary, 1)
dictGrouped[attrs.Vendor] = subDict
}
subDict[key] = vuln
}
}
cacheGrouped := make(map[string]*cvefeed.Cache, len(dictGrouped))
for vendor, subDict := range dictGrouped {
cache := cvefeed.NewCache(subDict).SetRequireVersion(true).SetMaxSize(-1)
cacheGrouped[vendor] = cache
}
CPEItemCh := make(chan itemWithNVDMeta)
var foundSoftwareVulns []fleet.SoftwareVulnerability
@ -382,6 +399,11 @@ func checkCVEs(
return
}
cache, ok := cacheGrouped[CPEItem.GetMeta().Vendor]
if !ok {
// No such vendor in the Vulnerability dictionary
continue
}
cacheHits := cache.Get([]*wfn.Attributes{CPEItem.GetMeta()})
for _, matches := range cacheHits {
if len(matches.CPEs) == 0 {