From 0709d1bc5c1f59a710b4e69683c949ac39e725e7 Mon Sep 17 00:00:00 2001 From: Michal Nicpon <39177923+michalnicp@users.noreply.github.com> Date: Thu, 1 Sep 2022 10:02:07 -0600 Subject: [PATCH] improve vuln cpe matching on macos (#6985) * add cpe translations * fix matching on target_sw --- changes/issue-6628-macos-vuln | 5 + cmd/fleet/cron.go | 8 +- cmd/fleetctl/get_test.go | 8 +- cmd/fleetctl/vulnerability_data_stream.go | 16 +- .../vulnerability_data_stream_test.go | 1 + docs/Deploying/Configuration.md | 28 +- docs/Using-Fleet/Vulnerability-Processing.md | 66 ++- server/config/config.go | 6 +- server/config/config_test.go | 3 +- server/fleet/app.go | 1 + server/service/service_appconfig.go | 1 + server/vulnerabilities/cpe.go | 529 ++++++++++++++---- server/vulnerabilities/cpe_test.go | 184 +++++- server/vulnerabilities/cpe_translations.json | 33 ++ server/vulnerabilities/db.go | 101 ++-- server/vulnerabilities/sync.go | 21 +- server/vulnerabilities/sync_test.go | 13 + 17 files changed, 830 insertions(+), 194 deletions(-) create mode 100644 changes/issue-6628-macos-vuln create mode 100644 server/vulnerabilities/cpe_translations.json diff --git a/changes/issue-6628-macos-vuln b/changes/issue-6628-macos-vuln new file mode 100644 index 000000000..98feffff9 --- /dev/null +++ b/changes/issue-6628-macos-vuln @@ -0,0 +1,5 @@ +* Improved vulnerability detection on MacOS to decrease false positives and added the ability to define custom rules for matching software to CPEs for vulnerability processing. + When upgrading fleet, there are a few config options to consider or else vulnerability processing will stop working. + If the environment variable `FLEET_VULNERABILITIES_DISABLE_DATA_SYNC` is set to `true` or the flag `--vulnerabilities_disable_data_sync` is set, you must manually download the latest CPE database and CPE translations files and copy them to the vulnerabilities databases path that has been configured for fleet.. + If the environment variable `FLEET_VULNERABILITIES_CPE_DATABASE_URL` is set or the flag `--vulnerabilities_cpe_database_url` is set, you must ensure that the CPE database file the url points to has been updated to the latest version. + The latest CPE database and CPE translations files can be downloaded from [NVD Releases](https://github.com/fleetdm/nvd/releases/latest). diff --git a/cmd/fleet/cron.go b/cmd/fleet/cron.go index 397218ebc..c7de11bda 100644 --- a/cmd/fleet/cron.go +++ b/cmd/fleet/cron.go @@ -323,7 +323,13 @@ func checkNVDVulnerabilities( collectVulns bool, ) []fleet.SoftwareVulnerability { if !config.DisableDataSync { - err := vulnerabilities.Sync(vulnPath, config.CPEDatabaseURL) + opts := vulnerabilities.SyncOptions{ + VulnPath: config.DatabasesPath, + CPEDBURL: config.CPEDatabaseURL, + CPETranslationsURL: config.CPETranslationsURL, + CVEFeedPrefixURL: config.CVEFeedPrefixURL, + } + err := vulnerabilities.Sync(opts) if err != nil { errHandler(ctx, logger, "syncing vulnerability database", err) return nil diff --git a/cmd/fleetctl/get_test.go b/cmd/fleetctl/get_test.go index c68758234..2bf175a33 100644 --- a/cmd/fleetctl/get_test.go +++ b/cmd/fleetctl/get_test.go @@ -603,7 +603,7 @@ spec: }) t.Run("IncludeServerConfig", func(t *testing.T) { - expectedYaml := `--- + expectedYAML := `--- apiVersion: v1 kind: config spec: @@ -675,6 +675,7 @@ spec: osquery_policy: 1h0m0s vulnerabilities: cpe_database_url: "" + cpe_translations_url: "" current_instance_checks: "" cve_feed_prefix_url: "" databases_path: "" @@ -788,6 +789,7 @@ spec: "databases_path": "", "periodicity": "0s", "cpe_database_url": "", + "cpe_translations_url": "", "cve_feed_prefix_url": "", "current_instance_checks": "", "disable_data_sync": false, @@ -824,8 +826,8 @@ spec: } ` - assert.YAMLEq(t, expectedYaml, runAppForTest(t, []string{"get", "config", "--include-server-config"})) - assert.YAMLEq(t, expectedYaml, runAppForTest(t, []string{"get", "config", "--include-server-config", "--yaml"})) + assert.YAMLEq(t, expectedYAML, runAppForTest(t, []string{"get", "config", "--include-server-config"})) + assert.YAMLEq(t, expectedYAML, runAppForTest(t, []string{"get", "config", "--include-server-config", "--yaml"})) require.JSONEq(t, expectedJSON, runAppForTest(t, []string{"get", "config", "--include-server-config", "--json"})) }) } diff --git a/cmd/fleetctl/vulnerability_data_stream.go b/cmd/fleetctl/vulnerability_data_stream.go index 9075d7c0f..a17c6cb2c 100644 --- a/cmd/fleetctl/vulnerability_data_stream.go +++ b/cmd/fleetctl/vulnerability_data_stream.go @@ -40,10 +40,17 @@ Downloads (if needed) the data streams that can be used by the Fleet server to p if err != nil { return err } + client := fleethttp.NewClient() log(c, "[-] Downloading CPE database...") - client := fleethttp.NewClient() - err = vulnerabilities.DownloadCPEDatabase(dir, client) + err = vulnerabilities.DownloadCPEDB(dir, client, "") + if err != nil { + return err + } + log(c, " Done\n") + + log(c, "[-] Downloading CPE translations...") + err = vulnerabilities.DownloadCPETranslations(dir, client, "") if err != nil { return err } @@ -54,25 +61,22 @@ Downloads (if needed) the data streams that can be used by the Fleet server to p if err != nil { return err } - log(c, " Done\n") log(c, "[-] Downloading EPSS feed...") - err = vulnerabilities.DownloadEPSSFeed(dir, client) if err != nil { return err } - log(c, " Done\n") log(c, "[-] Downloading CISA known exploits feed...") - err = vulnerabilities.DownloadCISAKnownExploitsFeed(dir, client) if err != nil { return err } log(c, " Done\n") + log(c, "[-] Downloading Oval definitions...") err = oval.Sync(client, dir, nil) if err != nil { diff --git a/cmd/fleetctl/vulnerability_data_stream_test.go b/cmd/fleetctl/vulnerability_data_stream_test.go index 177dce7db..4894abc21 100644 --- a/cmd/fleetctl/vulnerability_data_stream_test.go +++ b/cmd/fleetctl/vulnerability_data_stream_test.go @@ -18,6 +18,7 @@ func TestVulnerabilityDataStream(t *testing.T) { vulnPath := t.TempDir() expectedOutput := `[-] Downloading CPE database... Done +[-] Downloading CPE translations... Done [-] Downloading NVD CVE feed... Done [-] Downloading EPSS feed... Done [-] Downloading CISA known exploits feed... Done diff --git a/docs/Deploying/Configuration.md b/docs/Deploying/Configuration.md index efaf8c893..a6ce6bed7 100644 --- a/docs/Deploying/Configuration.md +++ b/docs/Deploying/Configuration.md @@ -2056,7 +2056,10 @@ How often vulnerabilities are checked. This is also the interval at which the co ##### cpe_database_url -URL to fetch the CPE dictionary database from. Some users want to control where Fleet gets its database from. When Fleet sees this value defined, it downloads the file directly. It expects a file in the same format as can be found in https://github.com/fleetdm/nvd/releases. If this value is not defined, Fleet checks for the latest release in Github and only downloads it if needed. +You can fetch the CPE dictionary database from this URL. Some users want to control where Fleet gets its database. +When Fleet sees this value defined, it downloads the file directly. +It expects a file in the same format that can be found in https://github.com/fleetdm/nvd/releases. +If this value is not defined, Fleet checks for the latest release in Github and only downloads it if needed. - Default value: `""` - Environment variable: `FLEET_VULNERABILITIES_CPE_DATABASE_URL` @@ -2066,16 +2069,35 @@ URL to fetch the CPE dictionary database from. Some users want to control where cpe_database_url: "" ``` +##### cpe_translations_url + +You can fetch the CPE translations from this URL. +Translations are used when matching software to CPE entries in the CPE database that would otherwise be missed for various reasons. +When Fleet sees this value defined, it downloads the file directly. +It expects a file in the same format that can be found in https://github.com/fleetdm/nvd/releases. +If this value is not defined, Fleet checks for the latest release in Github and only downloads it if needed. + +- Default value: `""` +- Environment variable: `FLEET_VULNERABILITIES_CPE_TRANSLATIONS_URL` +- Config file format: + ``` + vulnerabilities: + cpe_translations_url: "" + ``` + ##### cve_feed_prefix_url -Similarly to the CPE dictionary, we allow users to define where to get the CVE feeds from. In this case, the url should be a host that serves the files in the path /feeds/json/cve/1.1/. Fleet expects to find there all the JSON Feeds that can be found in https://nvd.nist.gov/vuln/data-feeds. When not defined, Fleet downloads from the nvd.nist.gov host. +Like the CPE dictionary, we allow users to define where to get the CVE feeds. +In this case, the URL should be a host that serves the files in the path /feeds/json/cve/1.1/. +Fleet expects to find all the JSON Feeds that can be found in https://nvd.nist.gov/vuln/data-feeds. +When not defined, Fleet downloads from the nvd.nist.gov host. - Default value: `""` - Environment variable: `FLEET_VULNERABILITIES_CVE_FEED_PREFIX_URL` - Config file format: ``` vulnerabilities: - cve_database_url: "" + cve_feed_prefix_url: "" ``` ##### current_instance_checks diff --git a/docs/Using-Fleet/Vulnerability-Processing.md b/docs/Using-Fleet/Vulnerability-Processing.md index d1de7a232..e58fe13af 100644 --- a/docs/Using-Fleet/Vulnerability-Processing.md +++ b/docs/Using-Fleet/Vulnerability-Processing.md @@ -222,8 +222,6 @@ The whole pipeline exists to compensate for these differences, and it can be div parse --> execute ``` - - ### Ingesting software lists from hosts The ingestion of software varies per platform. We run a `UNION` of several queries in each: @@ -244,13 +242,15 @@ As described briefly above, we do this by translating the NVD database of CPEs i #### How accurate is this translation process? -This is the most error prone part of the process. The CPE can have some vagueness. This means that parts of it can be a `*`, which means when you match that CPE to a CVE it can match any of that part of the CPE. +This is the most error prone part of the process. +The CPE can have some vagueness. +This means that parts of it can be a `*`, which means when you match that CPE to a CVE it can match any of that part of the CPE. If the CPE is too vague, the extreme case being all parts are `*`, all CVEs will match. You want a very specific CPE, but not too specific that a small error would make it not match a CVE (false negative). Let's look into some examples of this stage. -#### tmux +##### Example: tmux tmux is a Unix terminal utility to multiplex ttys. It appears listed like this in macOS: @@ -280,6 +280,64 @@ Now things start to get slightly more tricky. We have to remove the `.app` suffi These are two illustrative examples. The reality is that there is no map or list of all the software available and how it's presented in each platform, so the "software to CPE" translation process is going to be evolving constantly. +#### Improving accuracy + +In order to improve the accuracy of matching software to CPEs, CPE translations rules are added for known cases where matching fails. +`server/vulnerabilities/cpe_translations.json` contains these rules and is included in the [NVD release](https://github.com/fleetdm/nvd/releases/latest). + +##### Example: `ruby@2.7` installed via `homebrew` + +The following CPE translation rule is used to reduce false positives when ruby is installed via homebrew. +This is needed because ruby is commonly included in the title in the CPE database. +This rule matches the software name `ruby` matching a regular expression pattern and installed using `homebrew`. +When searching for CPEs, the specifed `product` and `vendor` will be added to the filter critera. + +``` +[ + { + "software": { + "name": ["/^ruby(@.*)?$/"], + "source": ["homebrew_packages"] + }, + "translation": { + "product": ["ruby"], + "vendor": ["ruby-lang"] + } + } +] +``` + +##### CPE Translations (array[CPE Translation Entry]) + +##### CPE Translation Entry (object) + +The CPE translation rule. + +| Name | Type | Description | +| --- | --- | --- | +| `software` | array[CPE Translation Software] | The CPE translation software match criteria. | +| `translation` | array[CPE Translation] | The CPE translation. | + +##### CPE Translation Software (object) + +The CPE translation software match criteria. Used to match software collected from hosts. Fields are are AND'd together. Values inside each field are OR'd together. + +| Name | Type | Description | +| --- | --- | --- | +| `name` | array[string] | The software name to match. Enclose within `/` to specify a regular expression pattern. | +| `bundle_identifer` | array[string] | The software bundle identifier (MacOS apps only) to match. Enclose within `/` to specify a regular expression pattern. | +| `source` | array[string] | The software source to match. Enclose within `/` to specify a regular expression pattern. | + +##### CPE Translation (object) + +The CPE translation. Used to match CPEs in the CPE database. Fields are are AND'd together. Values inside each field are OR'd together. + +| Name | Type | Description | +| --- | --- | --- | +| `product` | array[string] | The CPE product. | +| `vendor` | array[string] | The CPE vendor. | +| `target_sw` | array[string] | The CPE target software. | + ### Matching a CPE to a CVE Once we have a good CPE, we can match it against the CVE database. We download the data streams locally and match each CPE to the whole list. The matching is done using the [nvdtools implementation](https://github.com/facebookincubator/nvdtools). diff --git a/server/config/config.go b/server/config/config.go index b405e2001..807a2e595 100644 --- a/server/config/config.go +++ b/server/config/config.go @@ -276,6 +276,7 @@ type VulnerabilitiesConfig struct { DatabasesPath string `json:"databases_path" yaml:"databases_path"` Periodicity time.Duration `json:"periodicity" yaml:"periodicity"` CPEDatabaseURL string `json:"cpe_database_url" yaml:"cpe_database_url"` + CPETranslationsURL string `json:"cpe_translations_url" yaml:"cpe_translations_url"` CVEFeedPrefixURL string `json:"cve_feed_prefix_url" yaml:"cve_feed_prefix_url"` CurrentInstanceChecks string `json:"current_instance_checks" yaml:"current_instance_checks"` DisableDataSync bool `json:"disable_data_sync" yaml:"disable_data_sync"` @@ -641,7 +642,9 @@ func (man Manager) addConfigs() { man.addConfigDuration("vulnerabilities.periodicity", 1*time.Hour, "How much time to wait between processing software for vulnerabilities.") man.addConfigString("vulnerabilities.cpe_database_url", "", - "URL from which to get the latest CPE database. If empty, defaults to the official Github link.") + "URL from which to get the latest CPE database. If empty, it will be downloaded from the latest release available at https://github.com/fleetdm/nvd/releases.") + man.addConfigString("vulnerabilities.cpe_translations_url", "", + "URL from which to get the latest CPE translations. If empty, it will be downloaded from the latest release available at https://github.com/fleetdm/nvd/releases.") man.addConfigString("vulnerabilities.cve_feed_prefix_url", "", "Prefix URL for the CVE data feed. If empty, default to https://nvd.nist.gov/") man.addConfigString("vulnerabilities.current_instance_checks", "auto", @@ -855,6 +858,7 @@ func (man Manager) LoadConfig() FleetConfig { DatabasesPath: man.getConfigString("vulnerabilities.databases_path"), Periodicity: man.getConfigDuration("vulnerabilities.periodicity"), CPEDatabaseURL: man.getConfigString("vulnerabilities.cpe_database_url"), + CPETranslationsURL: man.getConfigString("vulnerabilities.cpe_translations_url"), CVEFeedPrefixURL: man.getConfigString("vulnerabilities.cve_feed_prefix_url"), CurrentInstanceChecks: man.getConfigString("vulnerabilities.current_instance_checks"), DisableDataSync: man.getConfigBool("vulnerabilities.disable_data_sync"), diff --git a/server/config/config_test.go b/server/config/config_test.go index ffb109025..a0c0cbfef 100644 --- a/server/config/config_test.go +++ b/server/config/config_test.go @@ -79,7 +79,8 @@ func TestConfigRoundtrip(t *testing.T) { require.Nil(t, err) // Ensure the read config is the same as the original - assert.Equal(t, *original, man.LoadConfig()) + actual := man.LoadConfig() + assert.Equal(t, *original, actual) } func TestConfigOsqueryAsync(t *testing.T) { diff --git a/server/fleet/app.go b/server/fleet/app.go index 0021d9f57..e7f103ca8 100644 --- a/server/fleet/app.go +++ b/server/fleet/app.go @@ -488,6 +488,7 @@ type VulnerabilitiesConfig struct { DatabasesPath string `json:"databases_path"` Periodicity time.Duration `json:"periodicity"` CPEDatabaseURL string `json:"cpe_database_url"` + CPETranslationsURL string `json:"cpe_translations_url"` CVEFeedPrefixURL string `json:"cve_feed_prefix_url"` CurrentInstanceChecks string `json:"current_instance_checks"` DisableDataSync bool `json:"disable_data_sync"` diff --git a/server/service/service_appconfig.go b/server/service/service_appconfig.go index 51c424e3e..fb7a5a1a8 100644 --- a/server/service/service_appconfig.go +++ b/server/service/service_appconfig.go @@ -121,6 +121,7 @@ func (svc *Service) VulnerabilitiesConfig(ctx context.Context) (*fleet.Vulnerabi DatabasesPath: svc.config.Vulnerabilities.DatabasesPath, Periodicity: svc.config.Vulnerabilities.Periodicity, CPEDatabaseURL: svc.config.Vulnerabilities.CPEDatabaseURL, + CPETranslationsURL: svc.config.Vulnerabilities.CPETranslationsURL, CVEFeedPrefixURL: svc.config.Vulnerabilities.CVEFeedPrefixURL, CurrentInstanceChecks: svc.config.Vulnerabilities.CurrentInstanceChecks, DisableDataSync: svc.config.Vulnerabilities.DisableDataSync, diff --git a/server/vulnerabilities/cpe.go b/server/vulnerabilities/cpe.go index abf93abdc..48e8c9fb7 100644 --- a/server/vulnerabilities/cpe.go +++ b/server/vulnerabilities/cpe.go @@ -2,6 +2,7 @@ package vulnerabilities import ( "context" + "encoding/json" "errors" "fmt" "net/http" @@ -10,8 +11,8 @@ import ( "path/filepath" "regexp" "strings" - "time" + "github.com/doug-martin/goqu/v9" "github.com/fleetdm/fleet/v4/pkg/download" "github.com/fleetdm/fleet/v4/server/contexts/ctxerr" "github.com/fleetdm/fleet/v4/server/fleet" @@ -27,15 +28,7 @@ const ( repo = "nvd" ) -type NVDRelease struct { - Etag string - CreatedAt time.Time - CPEURL string -} - -var cpeSqliteRegex = regexp.MustCompile(`^cpe-.*\.sqlite\.gz$`) - -func GetLatestNVDRelease(client *http.Client) (*NVDRelease, error) { +func GetLatestNVDRelease(client *http.Client) (*github.RepositoryRelease, error) { ghclient := github.NewClient(client) ctx := context.Background() releases, _, err := ghclient.Repositories.ListReleases(ctx, owner, repo, &github.ListOptions{Page: 0, PerPage: 10}) @@ -43,84 +36,64 @@ func GetLatestNVDRelease(client *http.Client) (*NVDRelease, error) { return nil, err } - if len(releases) == 0 { - return nil, nil - } - - cpeURL := "" - - // TODO: get not draft release - - for _, asset := range releases[0].Assets { - if asset != nil { - matched := cpeSqliteRegex.MatchString(asset.GetName()) - if !matched { - continue - } - cpeURL = asset.GetBrowserDownloadURL() + for _, release := range releases { + // skip draft releases + if !release.GetDraft() { + return release, nil } } - return &NVDRelease{ - Etag: releases[0].GetName(), - CreatedAt: releases[0].GetCreatedAt().Time, - CPEURL: cpeURL, - }, nil + return nil, errors.New("no nvd release found") } -type syncOpts struct { - url string -} +const cpeDBFilename = "cpe.sqlite" -type CPESyncOption func(*syncOpts) +var cpeDBRegex = regexp.MustCompile(`^cpe-.*\.sqlite\.gz$`) -func WithCPEURL(url string) CPESyncOption { - return func(o *syncOpts) { - o.url = url - } -} - -const cpeDatabaseFilename = "cpe.sqlite" - -// DownloadCPEDatabase downloads the CPE database from the -// latest release of github.com/fleetdm/nvd to the given dbPath. -// An alternative URL can be set via the WithCPEURL option. -// -// It won't download the database if the database has already been downloaded and -// has an mtime after the release date. -func DownloadCPEDatabase( +// DownloadCPEDB downloads the CPE database to the given vulnPath. If cpeDBURL is empty, attempts to download it +// from the latest release of github.com/fleetdm/nvd. Skips downloading if CPE database is newer than the release. +func DownloadCPEDB( vulnPath string, client *http.Client, - opts ...CPESyncOption, + cpeDBURL string, ) error { - var o syncOpts - for _, fn := range opts { - fn(&o) - } + path := filepath.Join(vulnPath, cpeDBFilename) - dbPath := filepath.Join(vulnPath, cpeDatabaseFilename) - - if o.url == "" { - nvdRelease, err := GetLatestNVDRelease(client) + if cpeDBURL == "" { + release, err := GetLatestNVDRelease(client) if err != nil { return err } - stat, err := os.Stat(dbPath) - if err != nil { - if !errors.Is(err, os.ErrNotExist) { - return err + stat, err := os.Stat(path) + switch { + case errors.Is(err, os.ErrNotExist): + // okay + case err != nil: + return err + default: + if stat.ModTime().After(release.CreatedAt.Time) { + // file is newer than release, do nothing + return nil } - } else if !nvdRelease.CreatedAt.After(stat.ModTime()) { - return nil } - o.url = nvdRelease.CPEURL + + for _, asset := range release.Assets { + if cpeDBRegex.MatchString(asset.GetName()) { + cpeDBURL = asset.GetBrowserDownloadURL() + break + } + } + if cpeDBURL == "" { + return errors.New("failed to find cpe database in nvd release") + + } } - u, err := url.Parse(o.url) + u, err := url.Parse(cpeDBURL) if err != nil { return err } - if err := download.DownloadAndExtract(client, u, dbPath); err != nil { + if err := download.DownloadAndExtract(client, u, path); err != nil { return err } @@ -130,6 +103,8 @@ func DownloadCPEDatabase( type IndexedCPEItem struct { ID int `json:"id" db:"rowid"` Title string `json:"title" db:"title"` + Product string `json:"product" db:"product"` + Vendor string `json:"vendor" db:"vendor"` Version *string `json:"version" db:"version"` TargetSW *string `json:"target_sw" db:"target_sw"` CPE23 string `json:"cpe23" db:"cpe23"` @@ -140,67 +115,387 @@ func cleanAppName(appName string) string { return strings.TrimSuffix(appName, ".app") } -var onlyAlphaNumeric = regexp.MustCompile("[^a-zA-Z0-9]+") +var nonAlphaNumeric = regexp.MustCompile(`[^a-zA-Z0-9]+`) -func CPEFromSoftware(db *sqlx.DB, software *fleet.Software) (string, error) { - targetSW := "" - switch software.Source { - case "apps": - targetSW = "macos" - case "python_packages": - targetSW = "python" - case "chrome_extensions": - targetSW = "chrome" - case "firefox_addons": - targetSW = "firefox" - case "safari_extensions": - targetSW = "safari" - case "deb_packages": - case "portage_packages": - case "rpm_packages": - case "npm_packages": - targetSW = `"node.js"` - case "atom_packages": - case "programs": - targetSW = `"windows*"` - case "ie_extensions": - case "chocolatey_packages": - } +// sanitizeMatch sanitizes the search string for sqlite fts queries. Replaces all non alpha numeric characters with spaces. +func sanitizeMatch(s string) string { + return nonAlphaNumeric.ReplaceAllString(s, " ") +} - checkTargetSW := "" - args := []interface{}{onlyAlphaNumeric.ReplaceAllString(cleanAppName(software.Name), " ")} - if targetSW != "" { - checkTargetSW = " AND target_sw MATCH ?" - args = append(args, targetSW) - } - args = append(args, software.Version) +var sanitizeVersionRe = regexp.MustCompile(`[^a-zA-Z0-9_-]+`) - query := fmt.Sprintf( - `SELECT rowid, * FROM cpe WHERE rowid in ( - SELECT rowid FROM cpe_search WHERE title MATCH ?%s - ) and version=? order by deprecated asc`, - checkTargetSW, - ) - var indexedCPEs []IndexedCPEItem - err := db.Select(&indexedCPEs, query, args...) +// sanitizeVersion attempts to sanitize versions and attempt to make it dot separated. +// Eg Zoom reports version as "5.11.1 (8356)". In the NVD CPE dictionary it should be 5.11.1.8356. +func sanitizeVersion(version string) string { + parts := sanitizeVersionRe.Split(version, -1) + return strings.Trim(strings.Join(parts, "."), ".") +} + +const cpeTranslationsFilename = "cpe_translations.json" + +func loadCPETranslations(path string) (CPETranslations, error) { + f, err := os.Open(path) if err != nil { - return "", fmt.Errorf("getting cpes for: %s: %w", cleanAppName(software.Name), err) + return nil, err + } + defer f.Close() + + var translations CPETranslations + if err := json.NewDecoder(f).Decode(&translations); err != nil { + return nil, fmt.Errorf("decode json: %w", err) } + return translations, nil +} + +// DownloadCPETranslations downloads the CPE translations to the given vulnPath. If cpeTranslationsURL is empty, attempts to download it +// from the latest release of github.com/fleetdm/nvd. Skips downloading if CPE translations is newer than the release. +func DownloadCPETranslations(vulnPath string, client *http.Client, cpeTranslationsURL string) error { + path := filepath.Join(vulnPath, cpeTranslationsFilename) + + if cpeTranslationsURL == "" { + release, err := GetLatestNVDRelease(client) + if err != nil { + return err + } + stat, err := os.Stat(path) + switch { + case errors.Is(err, os.ErrNotExist): + // okay + case err != nil: + return err + default: + if stat.ModTime().After(release.CreatedAt.Time) { + // file is newer than release, do nothing + return nil + } + } + + for _, asset := range release.Assets { + if cpeTranslationsFilename == asset.GetName() { + cpeTranslationsURL = asset.GetBrowserDownloadURL() + break + } + } + if cpeTranslationsURL == "" { + return errors.New("failed to find cpe translations in nvd release") + + } + } + + u, err := url.Parse(cpeTranslationsURL) + if err != nil { + return err + } + if err := download.Download(client, u, path); err != nil { + return err + } + + return nil +} + +// regexpCache caches compiled regular expressions. Not safe for concurrent use. +type regexpCache struct { + re map[string]*regexp.Regexp +} + +func newRegexpCache() *regexpCache { + return ®expCache{re: make(map[string]*regexp.Regexp)} +} + +func (r *regexpCache) Get(pattern string) (*regexp.Regexp, error) { + if re, ok := r.re[pattern]; ok { + return re, nil + } + + re, err := regexp.Compile(pattern) + if err != nil { + return nil, err + } + r.re[pattern] = re + return re, nil +} + +// CPETranslations include special case translations for software that fail to match entries in the NVD CPE Dictionary +// using the standard logic. This may be due to unexpected vendor or product names. +// +// Example: +// +// [ +// { +// "match": { +// "bundle_identifier": ["com.1password.1password"] +// }, +// "translation": { +// "product": ["1password"], +// "vendor": ["agilebits"] +// } +// } +// ] +type CPETranslations []CPETranslationItem + +func (c CPETranslations) Translate(reCache *regexpCache, s *fleet.Software) (CPETranslation, bool, error) { + for _, item := range c { + match, err := item.Software.Matches(reCache, s) + if err != nil { + return CPETranslation{}, false, err + } + if match { + return item.Filter, true, nil + } + } + + return CPETranslation{}, false, nil +} + +type CPETranslationItem struct { + Software CPETranslationSoftware `json:"software"` + Filter CPETranslation `json:"filter"` +} + +// CPETranslationSoftware represents software match criteria for cpe translations. +type CPETranslationSoftware struct { + Name []string `json:"name"` + BundleIdentifier []string `json:"bundle_identifier"` + Source []string `json:"source"` +} + +// Matches returns true if the software satifies all the match criteria. +func (c CPETranslationSoftware) Matches(reCache *regexpCache, s *fleet.Software) (bool, error) { + matches := func(a, b string) (bool, error) { + // check if its a regular expression enclosed in '/' + if len(a) > 2 && a[0] == '/' && a[len(a)-1] == '/' { + pattern := a[1 : len(a)-1] + re, err := reCache.Get(pattern) + if err != nil { + return false, err + } + return re.MatchString(b), nil + } + return a == b, nil + } + + if len(c.Name) > 0 { + found := false + for _, name := range c.Name { + match, err := matches(name, s.Name) + if err != nil { + return false, err + } + if match { + found = true + break + } + } + if !found { + return false, nil + } + } + if len(c.BundleIdentifier) > 0 { + found := false + for _, bundleID := range c.BundleIdentifier { + match, err := matches(bundleID, s.BundleIdentifier) + if err != nil { + return false, err + } + if match { + found = true + break + } + } + if !found { + return false, nil + } + } + if len(c.Source) > 0 { + found := false + for _, source := range c.Source { + match, err := matches(source, s.Source) + if err != nil { + return false, err + } + if match { + found = true + break + } + } + if !found { + return false, nil + } + } + return true, nil +} + +type CPETranslation struct { + Product []string `json:"product"` + Vendor []string `json:"vendor"` + TargetSW []string `json:"target_sw"` +} + +// CPEFromSoftware attempts to find a matching cpe entry for the given software in the NVD CPE dictionary. `db` contains data from the NVD CPE dictionary +// and is optimized for lookups, see `GenerateCPEDB`. `translations` are used to aid in cpe matching. When searching for cpes, we first check if it matches +// any translations, and then lookup in the cpe database based on the title, product, vendor, target_sw, and version. +func CPEFromSoftware(db *sqlx.DB, software *fleet.Software, translations CPETranslations, reCache *regexpCache) (string, error) { + version := sanitizeVersion(software.Version) + + ds := goqu.Dialect("sqlite").From(goqu.I("cpe_2").As("c")). + Select( + "c.rowid", + "c.title", + "c.product", + "c.vendor", + "c.version", + "c.target_sw", + "c.cpe23", + "c.deprecated", + ). + Join( + goqu.I("cpe_search").As("cs"), + goqu.On(goqu.I("cs.rowid").Eq(goqu.I("c.rowid"))), + ). + Where( + goqu.I("c.version").Eq(version), + ) + + translation, match, err := translations.Translate(reCache, software) + if err != nil { + return "", fmt.Errorf("translate software: %w", err) + } + if match { + if len(translation.Product) > 0 { + var exps []goqu.Expression + for _, product := range translation.Product { + exps = append(exps, goqu.I("c.product").Eq(product)) + } + ds = ds.Where(goqu.Or(exps...)) + } + if len(translation.Vendor) > 0 { + var exps []goqu.Expression + for _, vendor := range translation.Vendor { + exps = append(exps, goqu.I("c.vendor").Eq(vendor)) + } + ds = ds.Where(goqu.Or(exps...)) + } + if len(translation.TargetSW) > 0 { + var exps []goqu.Expression + for _, targetSW := range translation.TargetSW { + exps = append(exps, goqu.I("c.target_sw").Eq(targetSW)) + } + ds = ds.Where(goqu.Or(exps...)) + } + } else { + + name := software.Name + var targetSW string + + switch software.Source { + case "apps": + name = cleanAppName(software.Name) + + // match on bundle identifier to reduce false positives for software with short names eg notes, + // printer, calculator. + // match the following target_sw + // - mac + // - mac_os + // - mac_os_x + // - macos + ds = ds.Where( + goqu.L("? LIKE '%' || c.vendor || '%'", software.BundleIdentifier), + goqu.Or( + goqu.I("c.target_sw").Eq(""), + goqu.I("c.target_sw").Like("mac%"), + ), + ) + case "python_packages": + targetSW = "python" + case "chrome_extensions": + targetSW = "chrome" + case "firefox_addons": + targetSW = "firefox" + case "safari_extensions": + targetSW = "safari" + case "npm_packages": + targetSW = `node.js` + case "programs": + + // match the following target_sw + // - windows + // - windows_10 + // - windows_7 + // - windows_8 + // - windows_8.1 + // - windows_ce + // - windows_communication_foundation + // - windows_integrated_security + // - windows_mobile + // - windows_phone + // - windows_server + // - windows_server_2003 + // - windows_server_2008 + // - windows_vista + // - windows_xp + ds = ds.Where( + goqu.Or( + goqu.I("c.target_sw").Like("windows%"), + ), + ) + } + if targetSW != "" { + ds = ds.Where( + goqu.L("c.target_sw").Eq(targetSW), + ) + } + + // sanitize name for full text search on title + nameTerms := sanitizeMatch(name) + ds = ds.Where( + goqu.L("cs.title MATCH ?", nameTerms), + ) + } + + sql, args, err := ds.ToSQL() + if err != nil { + return "", fmt.Errorf("sql: %w", err) + } + + var indexedCPEs []IndexedCPEItem + err = db.Select(&indexedCPEs, sql, args...) + if err != nil { + return "", fmt.Errorf("getting cpes for: %s: %w", software.Name, err) + } + + // if there are any non-deprecated cpes, return the first one for _, item := range indexedCPEs { if !item.Deprecated { return item.CPE23, nil } + } + // try to find a non-deprecated cpe by looking up deprecated_by + for _, item := range indexedCPEs { deprecatedItem := item for { var deprecation IndexedCPEItem err = db.Get( &deprecation, - `SELECT rowid, * FROM cpe c WHERE cpe23 in ( - SELECT cpe23 from deprecated_by d where d.cpe_id=? - )`, + ` +SELECT + rowid, + title, + product, + vendor, + version, + target_sw, + cpe23, + deprecated +FROM + cpe_2 +WHERE + cpe23 IN ( + SELECT cpe23 FROM deprecated_by d WHERE d.cpe_id = ? + ) +`, deprecatedItem.ID, ) if err != nil { @@ -224,7 +519,7 @@ func TranslateSoftwareToCPE( vulnPath string, logger kitlog.Logger, ) error { - dbPath := filepath.Join(vulnPath, cpeDatabaseFilename) + dbPath := filepath.Join(vulnPath, cpeDBFilename) // Skip software from platforms for which we will be using OVAL for vulnerability detection. iterator, err := ds.AllSoftwareWithoutCPEIterator(ctx, oval.SupportedHostPlatforms) @@ -239,12 +534,20 @@ func TranslateSoftwareToCPE( } defer db.Close() + cpeTranslationsPath := filepath.Join(vulnPath, cpeTranslationsFilename) + cpeTranslations, err := loadCPETranslations(cpeTranslationsPath) + if err != nil { + level.Error(logger).Log("msg", "failed to load cpe translations", "err", err) + } + + reCache := newRegexpCache() + for iterator.Next() { software, err := iterator.Value() if err != nil { return ctxerr.Wrap(ctx, err, "getting value from iterator") } - cpe, err := CPEFromSoftware(db, software) + cpe, err := CPEFromSoftware(db, software, cpeTranslations, reCache) if err != nil { level.Error(logger).Log("software->cpe", "error translating to CPE, skipping...", "err", err) continue diff --git a/server/vulnerabilities/cpe_test.go b/server/vulnerabilities/cpe_test.go index a4f97e712..d1cb97e3b 100644 --- a/server/vulnerabilities/cpe_test.go +++ b/server/vulnerabilities/cpe_test.go @@ -36,22 +36,120 @@ func TestCPEFromSoftware(t *testing.T) { db, err := sqliteDB(dbPath) require.NoError(t, err) + reCache := newRegexpCache() + // checking an non existent version returns empty - cpe, err := CPEFromSoftware(db, &fleet.Software{Name: "Vendor Product-1.app", Version: "2.3.4", Source: "apps"}) + cpe, err := CPEFromSoftware(db, &fleet.Software{Name: "Vendor Product-1.app", Version: "2.3.4", BundleIdentifier: "vendor", Source: "apps"}, nil, reCache) require.NoError(t, err) require.Equal(t, "", cpe) // checking a version that exists works - cpe, err = CPEFromSoftware(db, &fleet.Software{Name: "Vendor Product-1.app", Version: "1.2.3", Source: "apps"}) + cpe, err = CPEFromSoftware(db, &fleet.Software{Name: "Vendor Product-1.app", Version: "1.2.3", BundleIdentifier: "vendor", Source: "apps"}, nil, reCache) require.NoError(t, err) require.Equal(t, "cpe:2.3:a:vendor:product-1:1.2.3:*:*:*:*:macos:*:*", cpe) // follows many deprecations - cpe, err = CPEFromSoftware(db, &fleet.Software{Name: "Vendor2 Product2.app", Version: "0.3", Source: "apps"}) + cpe, err = CPEFromSoftware(db, &fleet.Software{Name: "Vendor2 Product2.app", Version: "0.3", BundleIdentifier: "vendor2", Source: "apps"}, nil, reCache) require.NoError(t, err) require.Equal(t, "cpe:2.3:a:vendor2:product4:999:*:*:*:*:macos:*:*", cpe) } +func TestCPETranslations(t *testing.T) { + tempDir := t.TempDir() + + items, err := cpedict.Decode(strings.NewReader(XmlCPETestDict)) + require.NoError(t, err) + + dbPath := filepath.Join(tempDir, "cpe.sqlite") + + err = GenerateCPEDB(dbPath, items) + require.NoError(t, err) + + db, err := sqliteDB(dbPath) + require.NoError(t, err) + + tt := []struct { + Name string + Translations CPETranslations + Software *fleet.Software + Expected string + }{ + { + Name: "simple match", + Translations: CPETranslations{ + { + Software: CPETranslationSoftware{ + Name: []string{"X"}, + Source: []string{"apps"}, + }, + Filter: CPETranslation{ + Product: []string{"product-1"}, + Vendor: []string{"vendor"}, + }, + }, + }, + Software: &fleet.Software{ + Name: "X", + Version: "1.2.3", + Source: "apps", + }, + Expected: "cpe:2.3:a:vendor:product-1:1.2.3:*:*:*:*:macos:*:*", + }, + { + Name: "match name or", + Translations: CPETranslations{ + { + Software: CPETranslationSoftware{ + Name: []string{"X", "Y"}, + Source: []string{"apps"}, + }, + Filter: CPETranslation{ + Product: []string{"product-1"}, + Vendor: []string{"vendor"}, + }, + }, + }, + Software: &fleet.Software{ + Name: "Y", + Version: "1.2.3", + Source: "apps", + }, + Expected: "cpe:2.3:a:vendor:product-1:1.2.3:*:*:*:*:macos:*:*", + }, + { + Name: "match name regexp", + Translations: CPETranslations{ + { + Software: CPETranslationSoftware{ + Name: []string{"/^[A-Z]$/"}, + Source: []string{"apps"}, + }, + Filter: CPETranslation{ + Product: []string{"product-1"}, + Vendor: []string{"vendor"}, + }, + }, + }, + Software: &fleet.Software{ + Name: "Z", + Version: "1.2.3", + Source: "apps", + }, + Expected: "cpe:2.3:a:vendor:product-1:1.2.3:*:*:*:*:macos:*:*", + }, + } + + reCache := newRegexpCache() + + for _, tc := range tt { + t.Run(tc.Name, func(t *testing.T) { + cpe, err := CPEFromSoftware(db, tc.Software, tc.Translations, reCache) + require.NoError(t, err) + require.Equal(t, tc.Expected, cpe) + }) + } +} + func TestSyncCPEDatabase(t *testing.T) { nettest.Run(t) @@ -60,7 +158,7 @@ func TestSyncCPEDatabase(t *testing.T) { tempDir := t.TempDir() // first time, db doesn't exist, so it downloads - err := DownloadCPEDatabase(tempDir, client) + err := DownloadCPEDB(tempDir, client, "") require.NoError(t, err) dbPath := filepath.Join(tempDir, "cpe.sqlite") @@ -68,23 +166,29 @@ func TestSyncCPEDatabase(t *testing.T) { require.NoError(t, err) // and this works afterwards - software := &fleet.Software{Name: "1Password.app", Version: "7.2.3", Source: "apps"} - cpe, err := CPEFromSoftware(db, software) + reCache := newRegexpCache() + + software := &fleet.Software{Name: "1Password.app", + Version: "7.2.3", + BundleIdentifier: "com.1password.1password", + Source: "apps", + } + cpe, err := CPEFromSoftware(db, software, nil, reCache) require.NoError(t, err) require.Equal(t, "cpe:2.3:a:1password:1password:7.2.3:beta0:*:*:*:macos:*:*", cpe) - npmCPE, err := CPEFromSoftware(db, &fleet.Software{Name: "Adaltas Mixme 0.4.0 for Node.js", Version: "0.4.0", Source: "npm_packages"}) + npmCPE, err := CPEFromSoftware(db, &fleet.Software{Name: "Adaltas Mixme 0.4.0 for Node.js", Version: "0.4.0", Source: "npm_packages"}, nil, reCache) require.NoError(t, err) assert.Equal(t, "cpe:2.3:a:adaltas:mixme:0.4.0:*:*:*:*:node.js:*:*", npmCPE) - windowsCPE, err := CPEFromSoftware(db, &fleet.Software{Name: "HP Storage Data Protector 8.0 for Windows 8", Version: "8.0", Source: "programs"}) + windowsCPE, err := CPEFromSoftware(db, &fleet.Software{Name: "HP Storage Data Protector 8.0 for Windows 8", Version: "8.0", Source: "programs"}, nil, reCache) require.NoError(t, err) assert.Equal(t, "cpe:2.3:a:hp:storage_data_protector:8.0:-:*:*:*:windows_7:*:*", windowsCPE) // but now we truncate to make sure searching for cpe fails err = os.Truncate(dbPath, 0) require.NoError(t, err) - _, err = CPEFromSoftware(db, software) + _, err = CPEFromSoftware(db, software, nil, reCache) require.Error(t, err) // and we make the db older than the release @@ -93,7 +197,7 @@ func TestSyncCPEDatabase(t *testing.T) { require.NoError(t, err) // then it will download - err = DownloadCPEDatabase(tempDir, client) + err = DownloadCPEDB(tempDir, client, "") require.NoError(t, err) // let's register the mtime for the db @@ -106,7 +210,7 @@ func TestSyncCPEDatabase(t *testing.T) { require.NoError(t, err) defer db.Close() - cpe, err = CPEFromSoftware(db, software) + cpe, err = CPEFromSoftware(db, software, nil, reCache) require.NoError(t, err) require.Equal(t, "cpe:2.3:a:1password:1password:7.2.3:beta0:*:*:*:macos:*:*", cpe) @@ -114,7 +218,7 @@ func TestSyncCPEDatabase(t *testing.T) { time.Sleep(2 * time.Second) // let's check it doesn't download because it's new enough - err = DownloadCPEDatabase(tempDir, client) + err = DownloadCPEDB(tempDir, client, "") require.NoError(t, err) stat, err = os.Stat(dbPath) require.NoError(t, err) @@ -157,16 +261,18 @@ func TestTranslateSoftwareToCPE(t *testing.T) { iterator := &fakeSoftwareIterator{ softwares: []*fleet.Software{ { - ID: 1, - Name: "Product", - Version: "1.2.3", - Source: "apps", + ID: 1, + Name: "Product", + Version: "1.2.3", + BundleIdentifier: "vendor", + Source: "apps", }, { - ID: 2, - Name: "Product2", - Version: "0.3", - Source: "apps", + ID: 2, + Name: "Product2", + Version: "0.3", + BundleIdentifier: "vendor2", + Source: "apps", }, }, } @@ -204,7 +310,7 @@ func TestSyncsCPEFromURL(t *testing.T) { client := fleethttp.NewClient() tempDir := t.TempDir() - err := DownloadCPEDatabase(tempDir, client, WithCPEURL(ts.URL+"/hello-world.gz")) + err := DownloadCPEDB(tempDir, client, ts.URL+"/hello-world.gz") require.NoError(t, err) dbPath := filepath.Join(tempDir, "cpe.sqlite") @@ -212,3 +318,39 @@ func TestSyncsCPEFromURL(t *testing.T) { require.NoError(t, err) assert.Equal(t, "Hello world!", string(stored)) } + +func TestLegacyCPEDB(t *testing.T) { + + // Older versions of fleet used "select * ..." when querying from the cpe and cpe_search tables + // Ensure that this still works when generating the new cpe database. + type IndexedCPEItem struct { + ID int `json:"id" db:"rowid"` + Title string `json:"title" db:"title"` + Version *string `json:"version" db:"version"` + TargetSW *string `json:"target_sw" db:"target_sw"` + CPE23 string `json:"cpe23" db:"cpe23"` + Deprecated bool `json:"deprecated" db:"deprecated"` + } + tempDir := t.TempDir() + + items, err := cpedict.Decode(strings.NewReader(XmlCPETestDict)) + require.NoError(t, err) + + dbPath := filepath.Join(tempDir, "cpe.sqlite") + + err = GenerateCPEDB(dbPath, items) + require.NoError(t, err) + + db, err := sqliteDB(dbPath) + require.NoError(t, err) + + query := `SELECT rowid, * FROM cpe WHERE rowid in ( + SELECT rowid FROM cpe_search WHERE title MATCH ? + ) and version = ? order by deprecated asc` + + var indexedCPEs []IndexedCPEItem + err = db.Select(&indexedCPEs, query, "product", "1.2.3") + require.NoError(t, err) + + require.Len(t, indexedCPEs, 1) +} diff --git a/server/vulnerabilities/cpe_translations.json b/server/vulnerabilities/cpe_translations.json new file mode 100644 index 000000000..9df5ee0e7 --- /dev/null +++ b/server/vulnerabilities/cpe_translations.json @@ -0,0 +1,33 @@ +[ + { + "software": { + "bundle_identifier": ["us.zoom.xos"], + "source": ["apps"] + }, + "filter": { + "product": ["zoom", "meetings"], + "vendor": ["zoom"], + "target_sw": ["macos", "mac_os"] + } + }, + { + "software": { + "name": ["/^ruby(@.*)?$/"], + "source": ["homebrew_packages"] + }, + "filter": { + "product": ["ruby"], + "vendor": ["ruby-lang"] + } + }, + { + "software": { + "name": ["/^node(@.*)?$/"], + "source": ["homebrew_packages"] + }, + "filter": { + "product": ["node.js"], + "vendor": ["nodejs"] + } + } +] diff --git a/server/vulnerabilities/db.go b/server/vulnerabilities/db.go index a558fee08..d59d47206 100644 --- a/server/vulnerabilities/db.go +++ b/server/vulnerabilities/db.go @@ -21,25 +21,41 @@ func sqliteDB(dbPath string) (*sqlx.DB, error) { } func applyCPEDatabaseSchema(db *sqlx.DB) error { + // Use a new table cpe_2 containing new columns vendor, product. view cpe used for backwards compatibility + // with old fleet versions that use "select * from cpe ...". When creating the view, we need to + // select rowid because it is used for joins between the cpe and cpe_search tables _, err := db.Exec(` - CREATE TABLE IF NOT EXISTS cpe ( - cpe23 TEXT NOT NULL, - title TEXT NOT NULL, - version TEXT, - target_sw TEXT, - deprecated BOOLEAN DEFAULT FALSE - ); - CREATE TABLE IF NOT EXISTS deprecated_by ( - cpe_id INTEGER, - cpe23 TEXT NOT NULL, - FOREIGN KEY(cpe_id) REFERENCES cpe(rowid) - ); - CREATE VIRTUAL TABLE IF NOT EXISTS cpe_search USING fts5(title, target_sw); - CREATE INDEX IF NOT EXISTS idx_version ON cpe (version); - CREATE INDEX IF NOT EXISTS idx_cpe23 ON cpe (cpe23); - CREATE INDEX IF NOT EXISTS idx_target_sw ON cpe (target_sw); - CREATE INDEX IF NOT EXISTS idx_deprecated_by ON deprecated_by (cpe23); - `) +CREATE TABLE IF NOT EXISTS cpe_2 ( + cpe23 TEXT NOT NULL, + title TEXT NOT NULL, + vendor TEXT, + product TEXT, + version TEXT, + target_sw TEXT, + deprecated BOOLEAN DEFAULT FALSE +); +CREATE VIEW IF NOT EXISTS cpe AS +SELECT + rowid, + cpe23, + title, + version, + target_sw, + deprecated +FROM cpe_2; +CREATE TABLE IF NOT EXISTS deprecated_by ( + cpe_id INTEGER, + cpe23 TEXT NOT NULL, + FOREIGN KEY(cpe_id) REFERENCES cpe(rowid) +); +CREATE VIRTUAL TABLE IF NOT EXISTS cpe_search USING fts5(title, target_sw); +CREATE INDEX IF NOT EXISTS idx_cpe_2_cpe23 ON cpe_2 (cpe23); +CREATE INDEX IF NOT EXISTS idx_cpe_2_vendor ON cpe_2 (vendor); +CREATE INDEX IF NOT EXISTS idx_cpe_2_product ON cpe_2 (product); +CREATE INDEX IF NOT EXISTS idx_cpe_2_version ON cpe_2 (version); +CREATE INDEX IF NOT EXISTS idx_cpe_2_target_sw ON cpe_2 (target_sw); +CREATE INDEX IF NOT EXISTS idx_deprecated_by ON deprecated_by (cpe23); +`) return err } @@ -47,11 +63,14 @@ func generateCPEItem(item cpedict.CPEItem) ([]interface{}, map[string]string, er var cpes []interface{} deprecations := make(map[string]string) - targetSW := wfn.StripSlashes(item.CPE23.Name.TargetSW) - version := wfn.StripSlashes(item.CPE23.Name.Version) - title := item.Title["en-US"] cpe23 := wfn.Attributes(item.CPE23.Name).BindToFmtString() - cpes = append(cpes, cpe23, title, version, targetSW, item.Deprecated) + title := item.Title["en-US"] + vendor := wfn.StripSlashes(item.CPE23.Name.Vendor) + product := wfn.StripSlashes(item.CPE23.Name.Product) + version := wfn.StripSlashes(item.CPE23.Name.Version) + targetSW := wfn.StripSlashes(item.CPE23.Name.TargetSW) + + cpes = append(cpes, cpe23, title, vendor, product, version, targetSW, item.Deprecated) if item.CPE23.Deprecation != nil { for _, deprecatedBy := range item.CPE23.Deprecation.DeprecatedBy { @@ -82,54 +101,54 @@ func GenerateCPEDB(path string, items *cpedict.CPEList) error { } cpesCount := 0 - var allCPEs []interface{} + var cpesBatch []interface{} deprecationsCount := 0 - var allDeprecations []interface{} + var deprecationsBatch []interface{} for _, item := range items.Items { cpes, deprecations, err := generateCPEItem(item) if err != nil { return err } - allCPEs = append(allCPEs, cpes...) + cpesBatch = append(cpesBatch, cpes...) cpesCount++ if len(deprecations) > 0 { deprecationsCount++ } for key, val := range deprecations { - allDeprecations = append(allDeprecations, key, val) + deprecationsBatch = append(deprecationsBatch, key, val) } if cpesCount > batchSize { - err = bulkInsertCPEs(cpesCount, db, allCPEs) + err = bulkInsertCPEs(cpesCount, db, cpesBatch) if err != nil { return err } - allCPEs = []interface{}{} + cpesBatch = []interface{}{} cpesCount = 0 } if deprecationsCount > batchSize { - err := bulkInsertDeprecations(deprecationsCount, db, allDeprecations) + err := bulkInsertDeprecations(deprecationsCount, db, deprecationsBatch) if err != nil { return err } - allDeprecations = []interface{}{} + deprecationsBatch = []interface{}{} deprecationsCount = 0 } } if cpesCount > 0 { - err = bulkInsertCPEs(cpesCount, db, allCPEs) + err = bulkInsertCPEs(cpesCount, db, cpesBatch) if err != nil { return err } } if deprecationsCount > 0 { - err := bulkInsertDeprecations(deprecationsCount, db, allDeprecations) + err := bulkInsertDeprecations(deprecationsCount, db, deprecationsBatch) if err != nil { return err } } - _, err = db.Exec(`INSERT INTO cpe_search(rowid, title, target_sw) select rowid, title, target_sw from cpe`) + _, err = db.Exec(`INSERT INTO cpe_search (rowid, title, target_sw) select rowid, title, target_sw from cpe`) if err != nil { return err } @@ -137,7 +156,7 @@ func GenerateCPEDB(path string, items *cpedict.CPEList) error { } func bulkInsertDeprecations(deprecationsCount int, db *sqlx.DB, allDeprecations []interface{}) error { - values := strings.TrimSuffix(strings.Repeat("((SELECT rowid FROM CPE where cpe23=?), ?),", deprecationsCount), ",") + values := strings.TrimSuffix(strings.Repeat("((SELECT rowid FROM CPE where cpe23 = ?), ?),", deprecationsCount), ",") _, err := db.Exec( fmt.Sprintf(`INSERT INTO deprecated_by(cpe_id, cpe23) VALUES %s`, values), allDeprecations..., @@ -146,9 +165,19 @@ func bulkInsertDeprecations(deprecationsCount int, db *sqlx.DB, allDeprecations } func bulkInsertCPEs(cpesCount int, db *sqlx.DB, allCPEs []interface{}) error { - values := strings.TrimSuffix(strings.Repeat("(?, ?, ?, ?, ?),", cpesCount), ",") + values := strings.TrimSuffix(strings.Repeat("(?, ?, ?, ?, ?, ?, ?), ", cpesCount), ", ") _, err := db.Exec( - fmt.Sprintf(`INSERT INTO cpe(cpe23, title, version, target_sw, deprecated) VALUES %s`, values), + fmt.Sprintf(` +INSERT INTO cpe_2 ( + cpe23, + title, + vendor, + product, + version, + target_sw, + deprecated +) +VALUES %s`, values), allCPEs..., ) return err diff --git a/server/vulnerabilities/sync.go b/server/vulnerabilities/sync.go index 5ecdf5f21..3f2ba0a3c 100644 --- a/server/vulnerabilities/sync.go +++ b/server/vulnerabilities/sync.go @@ -24,23 +24,34 @@ import ( "github.com/go-kit/kit/log/level" ) +type SyncOptions struct { + VulnPath string + CPEDBURL string + CPETranslationsURL string + CVEFeedPrefixURL string +} + // Sync downloads all the vulnerability data sources. -func Sync(vulnPath string, cpeDatabaseURL string) error { +func Sync(opts SyncOptions) error { client := fleethttp.NewClient() - if err := DownloadCPEDatabase(vulnPath, client, WithCPEURL(cpeDatabaseURL)); err != nil { + if err := DownloadCPEDB(opts.VulnPath, client, opts.CPEDBURL); err != nil { return fmt.Errorf("sync CPE database: %w", err) } - if err := DownloadNVDCVEFeed(vulnPath, ""); err != nil { + if err := DownloadCPETranslations(opts.VulnPath, client, opts.CPETranslationsURL); err != nil { + return fmt.Errorf("sync CPE translations: %w", err) + } + + if err := DownloadNVDCVEFeed(opts.VulnPath, opts.CVEFeedPrefixURL); err != nil { return fmt.Errorf("sync NVD CVE feed: %w", err) } - if err := DownloadEPSSFeed(vulnPath, client); err != nil { + if err := DownloadEPSSFeed(opts.VulnPath, client); err != nil { return fmt.Errorf("sync EPSS CVE feed: %w", err) } - if err := DownloadCISAKnownExploitsFeed(vulnPath, client); err != nil { + if err := DownloadCISAKnownExploitsFeed(opts.VulnPath, client); err != nil { return fmt.Errorf("sync CISA known exploits feed: %w", err) } diff --git a/server/vulnerabilities/sync_test.go b/server/vulnerabilities/sync_test.go index 4e569108d..8c24c7510 100644 --- a/server/vulnerabilities/sync_test.go +++ b/server/vulnerabilities/sync_test.go @@ -71,3 +71,16 @@ func TestLoadCVEMeta(t *testing.T) { require.Equal(t, float64(0.01843), *meta.EPSSProbability) require.Equal(t, true, *meta.CISAKnownExploit) } + +func TestDownloadCPETranslations(t *testing.T) { + nettest.Run(t) + + client := fleethttp.NewClient() + + tempDir := t.TempDir() + + err := DownloadCPETranslations(tempDir, client, "") + require.NoError(t, err) + + assert.FileExists(t, filepath.Join(tempDir, cpeTranslationsFilename)) +}