Use NVD API 2.0 to download CVE information (#15102)

#14888

@getvictor This is ready for review, but keeping as draft as there are
probably many tests that need amending.

I used the new version of the `./tools/nvd/nvdvuln/nvdvuln.go` to
compare the current vulnerabilities found in our dogfood environment
with the vulnerabilities found by the code in this PR and both results
match:
```
go run -race -tags fts5 ./tools/nvd/nvdvuln/nvdvuln.go --debug --db_dir ./local --software_from_url <dogfood URL> --software_from_api_token <API_TOKEN> --sync 2>&1 | tee out.txt
[...]
CVEs found and expected matched!
```

- [X] Changes file added for user-visible changes in `changes/` or
`orbit/changes/`.
See [Changes
files](https://fleetdm.com/docs/contributing/committing-changes#changes-files)
for more information.
- [ ] Added/updated tests
- [X] Manual QA for all new/changed functionality

---------

Co-authored-by: Victor Lyuboslavsky <victor@fleetdm.com>
Co-authored-by: Victor Lyuboslavsky <victor.lyuboslavsky@gmail.com>
This commit is contained in:
Lucas Manuel Rodriguez 2023-11-21 15:30:07 -03:00 committed by GitHub
parent b961c8e912
commit 4194c44131
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 1562 additions and 203 deletions

View File

@ -0,0 +1 @@
* Fleet now uses the 2.0 API to download CVE information from NVD.

182
cmd/cve/generate.go Normal file
View File

@ -0,0 +1,182 @@
package main
import (
"bufio"
"compress/gzip"
"crypto/sha256"
"encoding/hex"
"flag"
"fmt"
"io"
"os"
"path/filepath"
"strconv"
"strings"
"time"
"github.com/fleetdm/fleet/v4/server/vulnerabilities/nvd"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
)
const emptyData = `{
"CVE_data_type" : "CVE",
"CVE_data_format" : "MITRE",
"CVE_data_version" : "4.0",
"CVE_data_numberOfCVEs" : "859",
"CVE_data_timestamp" : "2023-11-17T19:00Z",
"CVE_Items" : [ ]
}`
func main() {
dbDir := flag.String("db_dir", "/tmp/vulndbs", "Path to the vulnerability database")
debug := flag.Bool("debug", false, "Sets debug mode")
flag.Parse()
logger := log.NewJSONLogger(os.Stdout)
if *debug {
logger = level.NewFilter(logger, level.AllowDebug())
} else {
logger = level.NewFilter(logger, level.AllowInfo())
}
if err := os.MkdirAll(*dbDir, os.ModePerm); err != nil {
panic(err)
}
// Sync the CVE files
if err := nvd.DownloadNVDCVEFeed(*dbDir, "", *debug, logger); err != nil {
panic(err)
}
// Read in every cpe file and create a corresponding metadata file
// nvd data feeds start in 2002
logger.Log("msg", "Generating metadata files ...")
const startingYear = 2002
currentYear := time.Now().Year()
if currentYear < startingYear {
panic("system date is in the past, cannot continue")
}
entries := (currentYear - startingYear) + 1
for i := 0; i < entries; i++ {
year := startingYear + i
suffix := strconv.Itoa(year)
fileNameRaw := filepath.Join(*dbDir, fileFmt(suffix, "json", ""))
fileName := filepath.Join(*dbDir, fileFmt(suffix, "json", "gz"))
metaName := filepath.Join(*dbDir, fileFmt(suffix, "meta", ""))
compressFile(fileNameRaw, fileName)
createMetadata(fileName, metaName)
}
// Create modified and recent files
createEmptyFiles(*dbDir, "modified")
createEmptyFiles(*dbDir, "recent")
}
func compressFile(fileName string, newFileName string) {
// Read old file
file, err := os.Open(fileName)
if err != nil {
panic(err)
}
read := bufio.NewReader(file)
data, err := io.ReadAll(read)
if err != nil {
panic(err)
}
file.Close()
// Write new file
newFile, err := os.Create(newFileName)
if err != nil {
panic(err)
}
writer := gzip.NewWriter(newFile)
if _, err = writer.Write(data); err != nil {
panic(err)
}
writer.Close()
newFile.Close()
// Remove old file
if err = os.Remove(fileName); err != nil {
panic(err)
}
}
func createMetadata(fileName string, metaName string) {
fileInfo, err := os.Stat(fileName)
if err != nil {
panic(err)
}
hash, err := gunzipFileAndComputeSHA256(fileName)
if err != nil {
panic(err)
}
metaFile, err := os.Create(metaName)
if err != nil {
panic(err)
}
defer metaFile.Close()
if _, err = metaFile.WriteString(fmt.Sprintf("gzSize:%v\r\n", fileInfo.Size())); err != nil {
panic(err)
}
if _, err = metaFile.WriteString(fmt.Sprintf("sha256:%v\r\n", hash)); err != nil {
panic(err)
}
}
func createEmptyFiles(baseDir, suffix string) {
fileName := filepath.Join(baseDir, fileFmt(suffix, "json", "gz"))
metaName := filepath.Join(baseDir, fileFmt(suffix, "meta", ""))
dataFile, err := os.Create(fileName)
if err != nil {
panic(err)
}
writer := gzip.NewWriter(dataFile)
if _, err = writer.Write([]byte(emptyData)); err != nil {
panic(err)
}
if err = writer.Close(); err != nil {
panic(err)
}
dataFile.Close()
createMetadata(fileName, metaName)
}
func fileFmt(suffix, encoding, compression string) string {
const version = "1.1"
s := fmt.Sprintf("nvdcve-%s-%s.%s", version, suffix, encoding)
if compression != "" {
s += "." + compression
}
return s
}
func computeSHA256(r io.Reader) (string, error) {
hashImpl := sha256.New()
_, err := io.Copy(hashImpl, r)
if err != nil {
return "", err
}
hash := hashImpl.Sum(nil)
return strings.ToUpper(hex.EncodeToString(hash)), nil
}
func gunzipAndComputeSHA256(r io.Reader) (string, error) {
f, err := gzip.NewReader(r)
if err != nil {
return "", err
}
defer f.Close()
return computeSHA256(f)
}
func gunzipFileAndComputeSHA256(filename string) (string, error) {
f, err := os.Open(filename)
if err != nil {
return "", err
}
defer f.Close()
return gunzipAndComputeSHA256(f)
}

View File

@ -9,6 +9,7 @@ import (
"github.com/fleetdm/fleet/v4/server/vulnerabilities/msrc"
"github.com/fleetdm/fleet/v4/server/vulnerabilities/nvd"
"github.com/fleetdm/fleet/v4/server/vulnerabilities/oval"
klog "github.com/go-kit/log"
"github.com/urfave/cli/v2"
)
@ -58,7 +59,7 @@ Downloads (if needed) the data streams that can be used by the Fleet server to p
log(c, " Done\n")
log(c, "[-] Downloading NVD CVE feed...")
err = nvd.DownloadNVDCVEFeed(dir, "")
err = nvd.DownloadNVDCVEFeed(dir, "", false, klog.NewNopLogger())
if err != nil {
return err
}

View File

@ -2,9 +2,9 @@ package main
import (
"fmt"
"os"
"path"
"testing"
"time"
"github.com/fleetdm/fleet/v4/pkg/nettest"
"github.com/stretchr/testify/assert"
@ -28,6 +28,10 @@ func TestVulnerabilityDataStream(t *testing.T) {
[+] Data streams successfully downloaded!
`
// Set start and end indexes otherwise a full sync using the NVD API 2.0 takes a long time (>15m).
os.Setenv("NETWORK_TEST_NVD_CVE_START_IDX", "220000")
os.Setenv("NETWORK_TEST_NVD_CVE_END_IDX", "226000")
var actualOutput string
err := nettest.RunWithNetRetry(t, func() error {
w, err := runAppNoChecks([]string{"vulnerability-data-stream", "--dir", vulnPath})
@ -41,17 +45,13 @@ func TestVulnerabilityDataStream(t *testing.T) {
files := []string{
"cpe.sqlite",
"nvdcve-1.1-modified.json.gz",
"nvdcve-1.1-recent.json.gz",
"epss_scores-current.csv",
"known_exploited_vulnerabilities.json",
}
currentYear := time.Now().Year()
for y := 2002; y <= currentYear; y++ {
for y := 2008; y <= 2023; y++ {
files = append(
files,
fmt.Sprintf("nvdcve-1.1-%d.json.gz", y),
fmt.Sprintf("nvdcve-1.1-%d.meta", y),
fmt.Sprintf("nvdcve-1.1-%d.json", y),
)
}
for _, file := range files {

View File

@ -2363,10 +2363,12 @@ If this value is not defined, Fleet checks for the latest release in Github and
##### cve_feed_prefix_url
Like the CPE dictionary, we allow users to define where to get the CVE feeds.
In this case, the URL should be a host that serves the files in the path /feeds/json/cve/1.1/.
Fleet expects to find all the JSON Feeds that can be found in https://nvd.nist.gov/vuln/data-feeds.
When not defined, Fleet downloads from the nvd.nist.gov host.
Like the CPE dictionary, we allow users to define where to get the legacy CVE feeds from.
In this case, the URL should be a host that serves the files in the legacy feed format.
Fleet expects to find all the GZ and META files that can be found in https://nvd.nist.gov/vuln/data-feeds#JSON_FEED.
For example: `FLEET_VULNERABILITIES_CVE_FEED_PREFIX_URL` + `/nvdcve-1.1-2002.meta`
When not defined, Fleet downloads CVE information from the nvd.nist.gov host using the NVD 2.0 API.
- Default value: `""`
- Environment variable: `FLEET_VULNERABILITIES_CVE_FEED_PREFIX_URL`

2
go.mod
View File

@ -78,7 +78,7 @@ require (
github.com/open-policy-agent/opa v0.44.0
github.com/oschwald/geoip2-golang v1.8.0
github.com/osquery/osquery-go v0.0.0-20230603132358-d2e851b3991b
github.com/pandatix/nvdapi v0.6.4
github.com/pandatix/nvdapi v0.6.4
github.com/patrickmn/go-cache v2.1.0+incompatible
github.com/pkg/errors v0.9.1
github.com/prometheus/client_golang v1.13.0

View File

@ -106,7 +106,7 @@ func TestAnalyzer(t *testing.T) {
require.Nil(t, bulletin)
})
t.Run("returns the lastest bulletin", func(t *testing.T) {
t.Run("returns the latest bulletin", func(t *testing.T) {
d := time.Now()
dir := t.TempDir()

View File

@ -2,6 +2,7 @@ package nvd
import (
"context"
"errors"
"fmt"
"io/fs"
"net/url"
@ -21,6 +22,7 @@ import (
"github.com/facebookincubator/nvdtools/wfn"
"github.com/fleetdm/fleet/v4/server/contexts/ctxerr"
"github.com/fleetdm/fleet/v4/server/fleet"
nvdsync "github.com/fleetdm/fleet/v4/server/vulnerabilities/nvd/sync"
"github.com/go-kit/log"
kitlog "github.com/go-kit/log"
"github.com/go-kit/log/level"
@ -32,21 +34,45 @@ var semverPattern = regexp.MustCompile(`^v?(\d+\.\d+\.\d+)`)
// Define a regex pattern for splitting version strings into subparts
var nonNumericPartRegex = regexp.MustCompile(`(\d+)(\D.*)`)
// DownloadNVDCVEFeed downloads the NVD CVE feed. Skips downloading if the cve feed has not changed since the last time.
func DownloadNVDCVEFeed(vulnPath string, cveFeedPrefixURL string) error {
cve := nvd.SupportedCVE["cve-1.1.json.gz"]
source := nvd.NewSourceConfig()
// DownloadNVDCVEFeed downloads CVEs information from a CVE source.
// If cveFeedPrefixURL is not set, the NVD API 2.0 is used to download CVE information to vulnPath.
// If cveFeedPrefixURL is set, the CVE information will be downloaded assuming NVD's legacy feed format.
func DownloadNVDCVEFeed(vulnPath string, cveFeedPrefixURL string, debug bool, logger log.Logger) error {
if cveFeedPrefixURL != "" {
parsed, err := url.Parse(cveFeedPrefixURL)
if err != nil {
return fmt.Errorf("parsing cve feed url prefix override: %w", err)
}
source.Host = parsed.Host
source.CVEFeedPath = parsed.Path
source.Scheme = parsed.Scheme
return downloadNVDCVELegacy(vulnPath, cveFeedPrefixURL)
}
cveSyncer, err := nvdsync.NewCVE(
vulnPath,
nvdsync.WithLogger(logger),
nvdsync.WithDebug(debug),
)
if err != nil {
return err
}
if err := cveSyncer.Do(context.Background()); err != nil {
return fmt.Errorf("download nvd cve feed: %w", err)
}
return nil
}
func downloadNVDCVELegacy(vulnPath string, cveFeedPrefixURL string) error {
if cveFeedPrefixURL == "" {
return errors.New("missing cve_feed_prefix_url")
}
source := nvd.NewSourceConfig()
parsed, err := url.Parse(cveFeedPrefixURL)
if err != nil {
return fmt.Errorf("parsing cve feed url prefix override: %w", err)
}
source.Host = parsed.Host
source.CVEFeedPath = parsed.Path
source.Scheme = parsed.Scheme
cve := nvd.SupportedCVE["cve-1.1.json.gz"]
dfs := nvd.Sync{
Feeds: []nvd.Syncer{cve},
Source: source,
@ -64,13 +90,12 @@ func DownloadNVDCVEFeed(vulnPath string, cveFeedPrefixURL string) error {
if err := dfs.Do(ctx); err != nil {
return fmt.Errorf("download nvd cve feed: %w", err)
}
return nil
}
const publishedDateFmt = "2006-01-02T15:04Z" // not quite RFC3339
var rxNVDCVEArchive = regexp.MustCompile(`nvdcve.*\.gz$`)
var rxNVDCVEArchive = regexp.MustCompile(`nvdcve.*\.json.*$`)
func getNVDCVEFeedFiles(vulnPath string) ([]string, error) {
var files []string

View File

@ -15,6 +15,7 @@ import (
"github.com/fleetdm/fleet/v4/pkg/nettest"
"github.com/fleetdm/fleet/v4/server/fleet"
"github.com/fleetdm/fleet/v4/server/mock"
"github.com/go-kit/kit/log"
kitlog "github.com/go-kit/kit/log"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
@ -134,7 +135,10 @@ func TestTranslateCPEToCVE(t *testing.T) {
// download the CVEs once for all sub-tests, and then disable syncing
err := nettest.RunWithNetRetry(t, func() error {
return DownloadNVDCVEFeed(tempDir, "")
// We use cveFeedPrefixURL="https://nvd.nist.gov/feeds/json/cve/1.1/" because a full sync
// with the NVD API 2.0 takes a long time (>15m). These feeds will be deprecated
// on December 15th and this test will start failing then.
return DownloadNVDCVEFeed(tempDir, "https://nvd.nist.gov/feeds/json/cve/1.1/", false, log.NewNopLogger())
})
require.NoError(t, err)
@ -345,7 +349,7 @@ func TestSyncsCVEFromURL(t *testing.T) {
tempDir := t.TempDir()
cveFeedPrefixURL := ts.URL + "/feeds/json/cve/1.1/"
err := DownloadNVDCVEFeed(tempDir, cveFeedPrefixURL)
err := DownloadNVDCVEFeed(tempDir, cveFeedPrefixURL, false, log.NewNopLogger())
require.Error(t, err)
require.Contains(t,
err.Error(),

View File

@ -30,22 +30,29 @@ type SyncOptions struct {
CPEDBURL string
CPETranslationsURL string
CVEFeedPrefixURL string
Debug bool
}
// Sync downloads all the vulnerability data sources.
func Sync(opts SyncOptions, logger log.Logger) error {
level.Debug(logger).Log("msg", "syncing CPE sqlite")
start := time.Now()
if err := DownloadCPEDBFromGithub(opts.VulnPath, opts.CPEDBURL); err != nil {
return fmt.Errorf("sync CPE database: %w", err)
}
level.Debug(logger).Log("msg", "CPE sqlite synced", "duration", time.Since(start))
level.Debug(logger).Log("msg", "downloading CPE translations", "url", opts.CPETranslationsURL)
if err := DownloadCPETranslationsFromGithub(opts.VulnPath, opts.CPETranslationsURL); err != nil {
return fmt.Errorf("sync CPE translations: %w", err)
}
if err := DownloadNVDCVEFeed(opts.VulnPath, opts.CVEFeedPrefixURL); err != nil {
level.Debug(logger).Log("msg", "syncing CVEs")
start = time.Now()
if err := DownloadNVDCVEFeed(opts.VulnPath, opts.CVEFeedPrefixURL, opts.Debug, logger); err != nil {
return fmt.Errorf("sync NVD CVE feed: %w", err)
}
level.Debug(logger).Log("msg", "CVEs synced", "duration", time.Since(start))
if err := DownloadEPSSFeed(opts.VulnPath); err != nil {
return fmt.Errorf("sync EPSS CVE feed: %w", err)

View File

@ -0,0 +1,756 @@
// Package nvdsync provides a CVE syncer that uses the NVD 2.0 API to download CVE information
// and stores it in the legacy format. The reason we decided to store in the legacy format is because
// the github.com/facebookincubator/nvdtools doesn't yet support parsing the new API 2.0 JSON format.
package nvdsync
import (
"context"
"encoding/json"
"errors"
"fmt"
"io/fs"
"net/http"
"os"
"path/filepath"
"sort"
"strconv"
"time"
"github.com/facebookincubator/nvdtools/cvefeed/nvd/schema"
"github.com/fleetdm/fleet/v4/orbit/pkg/constant"
"github.com/fleetdm/fleet/v4/pkg/fleethttp"
"github.com/fleetdm/fleet/v4/server/ptr"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/pandatix/nvdapi/common"
"github.com/pandatix/nvdapi/v2"
)
// CVE syncs CVE information from the NVD database (nvd.nist.gov) using its API 2.0.
// It stores the CVE information using the legacy feed format.
// The reason we decided to store in the legacy format is because
// the github.com/facebookincubator/nvdtools doesn't yet support parsing
// the new API 2.0 JSON format.
type CVE struct {
client *http.Client
dbDir string
logger log.Logger
debug bool
}
var (
// timeBetweenRequests is the recommended time to wait between NVD API requests.
timeBetweenRequests = 6 * time.Second
// maxRetryAttempts is the maximum number of request to retry in case of API failure.
maxRetryAttempts = 10
// waitTimeForRetry is the time to wait between retries.
waitTimeForRetry = 30 * time.Second
)
// CVEOption allows configuring a CVE syncer.
type CVEOption func(*CVE)
// WithLogger sets the logger for a CVE syncer.
//
// Default value is log.NewNopLogger().
func WithLogger(logger log.Logger) CVEOption {
return func(s *CVE) {
s.logger = logger
}
}
// WithDebug sets the debug mode for a CVE syncer.
//
// Default value is false.
func WithDebug(debug bool) CVEOption {
return func(s *CVE) {
s.debug = debug
}
}
// NewCVE creates and returns a CVE syncer.
// The provided dbDir is the local directory to use to store/update
// CVE information from NVD.
func NewCVE(dbDir string, opts ...CVEOption) (*CVE, error) {
if dbDir == "" {
return nil, errors.New("directory not set")
}
s := CVE{
client: fleethttp.NewClient(),
dbDir: dbDir,
logger: log.NewNopLogger(),
}
for _, fn := range opts {
fn(&s)
}
return &s, nil
}
func (s *CVE) lastModStartDateFilePath() string {
return filepath.Join(s.dbDir, "last_mod_start_date.txt")
}
// Do runs the synchronization from the NVD service to the local DB directory.
func (s *CVE) Do(ctx context.Context) error {
ok, err := fileExists(s.lastModStartDateFilePath())
if err != nil {
return err
}
if !ok {
level.Debug(s.logger).Log("msg", "initial NVD CVE sync")
return s.initSync(ctx)
}
level.Debug(s.logger).Log("msg", "NVD CVE update")
return s.update(ctx)
}
// initSync performs the initial synchronization (full download) of all CVEs.
func (s *CVE) initSync(ctx context.Context) error {
// Remove any legacy feeds from previous versions of Fleet.
if err := s.removeLegacyFeeds(); err != nil {
return err
}
// Perform the initial download of all CVE information.
lastModStartDate, err := s.sync(ctx, nil)
if err != nil {
return err
}
// Write the lastModStartDate to be used in the next sync.
if err := s.writeLastModStartDateFile(lastModStartDate); err != nil {
return err
}
return nil
}
// removeLegacyFeeds removes all the legacy feed files downloaded by previous versions of Fleet.
func (s *CVE) removeLegacyFeeds() error {
// Using * to remove new unfinished syncs (uncompressed)
jsonGzs, err := filepath.Glob(filepath.Join(s.dbDir, "nvdcve-1.1-*.json*"))
if err != nil {
return err
}
metas, err := filepath.Glob(filepath.Join(s.dbDir, "nvdcve-1.1-*.meta"))
if err != nil {
return err
}
for _, path := range append(jsonGzs, metas...) {
level.Debug(s.logger).Log("msg", "removing legacy feed file", "path", path)
if err := os.Remove(path); err != nil {
return err
}
}
return nil
}
// update downloads all the new CVE updates since the last synchronization.
func (s *CVE) update(ctx context.Context) error {
// Load the lastModStartDate from the previous synchronization.
lastModStartDate_, err := os.ReadFile(s.lastModStartDateFilePath())
if err != nil {
return err
}
lastModStartDate := string(lastModStartDate_)
// Get the new CVE updates since the previous synchronization.
lastModStartDate, err = s.sync(ctx, &lastModStartDate)
if err != nil {
return err
}
// Update the lastModStartDate for the next synchronization.
if err := s.writeLastModStartDateFile(lastModStartDate); err != nil {
return err
}
return nil
}
func (s *CVE) updateYearFile(year int, cves []nvdapi.CVEItem) error {
// The NVD legacy feed files start at year 2002.
// This is assumed by the facebookincubator/nvdtools package.
if year < 2002 {
year = 2002
}
// Read the CVE file for the year.
readStart := time.Now()
storedCVEFeed, err := readCVEsLegacyFormat(s.dbDir, year)
if err != nil {
return err
}
level.Debug(s.logger).Log("msg", "read cves", "year", year, "duration", time.Since(readStart))
// Convert new API 2.0 format to legacy feed format and create map of new CVE information.
newLegacyCVEs := make(map[string]*schema.NVDCVEFeedJSON10DefCVEItem)
for _, cve := range cves {
legacyCVE := convertAPI20CVEToLegacy(cve, s.logger)
newLegacyCVEs[legacyCVE.CVE.CVEDataMeta.ID] = legacyCVE
}
// Update existing CVEs with the latest updates (e.g. NVD updated a CVSS metric on an existing CVE).
//
// This loop iterates the existing slice and, if there's an update for the item, it will
// update the item in place. The next for loop takes care of adding the newly reported CVEs.
updateStart := time.Now()
for i, storedCVE := range storedCVEFeed.CVEItems {
if newLegacyCVE, ok := newLegacyCVEs[storedCVE.CVE.CVEDataMeta.ID]; ok {
storedCVEFeed.CVEItems[i] = newLegacyCVE
delete(newLegacyCVEs, storedCVE.CVE.CVEDataMeta.ID)
}
}
level.Debug(s.logger).Log("msg", "updated cves", "year", year, "duration", time.Since(updateStart))
// Add any new CVEs (e.g. a new vulnerability has been found since last time so a new CVE number was reported).
//
// Any leftover items from the previous loop in newLegacyCVEs are new CVEs.
for _, cve := range newLegacyCVEs {
storedCVEFeed.CVEItems = append(storedCVEFeed.CVEItems, cve)
}
storedCVEFeed.CVEDataNumberOfCVEs = strconv.FormatInt(int64(len(storedCVEFeed.CVEItems)), 10)
// Store the file for the year.
storeStart := time.Now()
if err := storeCVEsInLegacyFormat(s.dbDir, year, storedCVEFeed); err != nil {
return err
}
level.Debug(s.logger).Log("msg", "stored cves", "year", year, "duration", time.Since(storeStart))
return nil
}
// writeLastModStartDateFile writes the lastModStartDate to a file in the local DB directory.
func (s *CVE) writeLastModStartDateFile(lastModStartDate string) error {
if err := os.WriteFile(
s.lastModStartDateFilePath(),
[]byte(lastModStartDate),
constant.DefaultWorldReadableFileMode,
); err != nil {
return err
}
return nil
}
// httpClient wraps an http.Client to allow for debug and setting a request context.
type httpClient struct {
*http.Client
ctx context.Context
debug bool
}
// Do implements common.HTTPClient.
func (c *httpClient) Do(request *http.Request) (*http.Response, error) {
start := time.Now()
if c.debug {
fmt.Fprintf(os.Stderr, "%s, request: %+v\n", time.Now(), request)
}
response, err := c.Client.Do(request.WithContext(c.ctx))
if err != nil {
return nil, err
}
if c.debug {
fmt.Fprintf(os.Stderr, "%s (%s) response: %+v\n", time.Now(), time.Since(start), response)
}
return response, err
}
// getHTTPClient returns common.HTTPClient to be used by nvdapi methods.
func (s *CVE) getHTTPClient(ctx context.Context, debug bool) common.HTTPClient {
return &httpClient{
Client: s.client,
ctx: ctx,
debug: debug,
}
}
// sync performs requests to the NVD https://services.nvd.nist.gov/rest/json/cves/2.0 service to get CVE information
// and updates the files in the local directory.
// It returns the lastModStartDate to use on a subsequent sync call.
//
// If lastModStartDate is nil, it performs the initial (full) synchronization of ALL CVEs.
// If lastModStartDate is set, then it fetches updates since the last sync call.
//
// Reference: https://nvd.nist.gov/developers/api-workflows.
func (s *CVE) sync(ctx context.Context, lastModStartDate *string) (newLastModStartDate string, err error) {
var (
startIdx = int64(0)
totalResults = 1
cvesByYear = make(map[int][]nvdapi.CVEItem)
retryAttempts = 0
lastModEndDate *string
now = time.Now().UTC().Format("2006-01-02T15:04:05.000")
vulnerabilitiesReceived = 0
)
if lastModStartDate != nil {
lastModEndDate = ptr.String(now)
}
// Environment variable NETWORK_TEST_NVD_CVE_START_IDX is set only in tests
// (to reduce test duration time).
if v := os.Getenv("NETWORK_TEST_NVD_CVE_START_IDX"); v != "" {
startIdx, err = strconv.ParseInt(v, 10, 32)
if err != nil {
return "", err
}
totalResults = int(startIdx) + 1
}
for startIndex := int(startIdx); startIndex < totalResults; {
startRequestTime := time.Now()
cveResponse, err := nvdapi.GetCVEs(s.getHTTPClient(ctx, s.debug), nvdapi.GetCVEsParams{
StartIndex: ptr.Int(startIndex),
LastModStartDate: lastModStartDate,
LastModEndDate: lastModEndDate,
})
if err != nil {
if retryAttempts > maxRetryAttempts {
return "", err
}
s.logger.Log("msg", "NVD request returned error", "err", err, "retry-in", waitTimeForRetry)
retryAttempts++
select {
case <-ctx.Done():
return "", ctx.Err()
case <-time.After(waitTimeForRetry):
continue
}
}
requestDuration := time.Since(startRequestTime)
retryAttempts = 0
totalResults = cveResponse.TotalResults
startIndex += cveResponse.ResultsPerPage
newLastModStartDate = cveResponse.Timestamp
// Environment variable NETWORK_TEST_NVD_CVE_END_IDX is set only in tests
// (to reduce test duration time).
if v := os.Getenv("NETWORK_TEST_NVD_CVE_END_IDX"); v != "" {
endIdx, err := strconv.ParseInt(v, 10, 32)
if err != nil {
return "", err
}
totalResults = int(endIdx)
}
for _, vuln := range cveResponse.Vulnerabilities {
year, err := strconv.Atoi((*vuln.CVE.ID)[4:8])
if err != nil {
return "", err
}
vulnerabilitiesReceived++
cvesByYear[year] = append(cvesByYear[year], vuln)
}
// Dump vulnerabilities to the year files to reduce memory footprint.
// Keeping all vulnerabilities in memory consumed around 11 GB of RAM.
var updateDuration time.Duration
if vulnerabilitiesReceived > 10_000 {
var (
yearWithMostVulns int
maxVulnsInYear int
)
for year, cvesInYear := range cvesByYear {
if len(cvesInYear) > maxVulnsInYear {
yearWithMostVulns = year
maxVulnsInYear = len(cvesInYear)
}
}
start := time.Now()
if err := s.updateYearFile(yearWithMostVulns, cvesByYear[yearWithMostVulns]); err != nil {
return "", err
}
updateDuration = time.Since(start)
level.Debug(s.logger).Log("msg", "updated file", "year", yearWithMostVulns, "duration", updateDuration, "vulns", maxVulnsInYear)
vulnerabilitiesReceived -= maxVulnsInYear
delete(cvesByYear, yearWithMostVulns)
}
if startIndex < totalResults {
select {
case <-ctx.Done():
return "", ctx.Err()
case <-time.After(timeBetweenRequests - requestDuration - updateDuration):
}
}
}
for year, cvesInYear := range cvesByYear {
start := time.Now()
if err := s.updateYearFile(year, cvesInYear); err != nil {
return "", err
}
level.Debug(s.logger).Log("msg", "updated file", "year", year, "duration", time.Since(start), "vulns", len(cvesInYear))
}
return newLastModStartDate, nil
}
// fileExists returns whether a file at path exists.
func fileExists(path string) (bool, error) {
switch _, err := os.Stat(path); {
case err == nil:
return true, nil
case errors.Is(err, fs.ErrNotExist):
return false, nil
default:
return false, err
}
}
// storeCVEsInLegacyFormat stores the CVEs in legacy feed format.
func storeCVEsInLegacyFormat(dbDir string, year int, cveFeed *schema.NVDCVEFeedJSON10) error {
sort.Slice(cveFeed.CVEItems, func(i, j int) bool {
return cveFeed.CVEItems[i].CVE.CVEDataMeta.ID < cveFeed.CVEItems[j].CVE.CVEDataMeta.ID
})
path := filepath.Join(dbDir, fmt.Sprintf("nvdcve-1.1-%d.json", year))
file, err := os.Create(path)
if err != nil {
return err
}
defer file.Close()
jsonEncoder := json.NewEncoder(file)
jsonEncoder.SetIndent("", " ")
if err := jsonEncoder.Encode(cveFeed); err != nil {
return err
}
if err := file.Close(); err != nil {
return err
}
return nil
}
// readCVEsLegacyFormat loads the CVEs stored in the legacy feed format.
func readCVEsLegacyFormat(dbDir string, year int) (*schema.NVDCVEFeedJSON10, error) {
path := filepath.Join(dbDir, fmt.Sprintf("nvdcve-1.1-%d.json", year))
file, err := os.Open(path)
if err != nil {
if errors.Is(err, fs.ErrNotExist) {
return &schema.NVDCVEFeedJSON10{
CVEDataFormat: "MITRE",
CVEDataTimestamp: time.Now().Format("2006-01-02T15:04:05Z"),
CVEDataType: "CVE",
CVEDataVersion: "4.0",
}, nil
}
return nil, err
}
defer file.Close()
var cveFeed schema.NVDCVEFeedJSON10
if err := json.NewDecoder(file).Decode(&cveFeed); err != nil {
return nil, err
}
if err := file.Close(); err != nil {
return nil, err
}
return &cveFeed, nil
}
func derefPtr[T any](p *T) T {
if p != nil {
return *p
}
var t T
return t
}
// convertAPI20CVEToLegacy performs the conversion of a CVE in API 2.0 format to the legacy feed format.
func convertAPI20CVEToLegacy(cve nvdapi.CVEItem, logger log.Logger) *schema.NVDCVEFeedJSON10DefCVEItem {
logger = log.With(logger, "cve", cve.CVE.ID)
descriptions := make([]*schema.CVEJSON40LangString, 0, len(cve.CVE.Descriptions))
for _, description := range cve.CVE.Descriptions {
// Keep only english descriptions to match the legacy.
if description.Lang != "en" {
continue
}
descriptions = append(descriptions, &schema.CVEJSON40LangString{
Lang: description.Lang,
Value: description.Value,
})
}
problemtypeData := make([]*schema.CVEJSON40ProblemtypeProblemtypeData, 0, len(cve.CVE.Weaknesses))
if len(cve.CVE.Weaknesses) == 0 {
problemtypeData = append(problemtypeData, &schema.CVEJSON40ProblemtypeProblemtypeData{
Description: []*schema.CVEJSON40LangString{},
})
}
for _, weakness := range cve.CVE.Weaknesses {
if weakness.Type != "Primary" {
continue
}
descriptions := make([]*schema.CVEJSON40LangString, 0, len(weakness.Description))
for _, description := range weakness.Description {
descriptions = append(descriptions, &schema.CVEJSON40LangString{
Lang: description.Lang,
Value: description.Value,
})
}
problemtypeData = append(problemtypeData, &schema.CVEJSON40ProblemtypeProblemtypeData{
Description: descriptions,
})
}
referenceData := make([]*schema.CVEJSON40Reference, 0, len(cve.CVE.References))
for _, reference := range cve.CVE.References {
tags := []string{} // Entries that have no tag set an empty list.
if len(reference.Tags) != 0 {
tags = reference.Tags
}
referenceData = append(referenceData, &schema.CVEJSON40Reference{
Name: reference.URL, // Most entries have name set to the URL, and there's no name field on API 2.0.
Refsource: "", // Not available on API 2.0.
Tags: tags,
URL: reference.URL,
})
}
nodes := []*schema.NVDCVEFeedJSON10DefNode{} // Legacy entries define an empty list if there are no nodes.
for _, configuration := range cve.CVE.Configurations {
if configuration.Operator != nil {
children := make([]*schema.NVDCVEFeedJSON10DefNode, 0, len(configuration.Nodes))
for _, node := range configuration.Nodes {
cpeMatches := make([]*schema.NVDCVEFeedJSON10DefCPEMatch, 0, len(node.CPEMatch))
for _, cpeMatch := range node.CPEMatch {
cpeMatches = append(cpeMatches, &schema.NVDCVEFeedJSON10DefCPEMatch{
CPEName: []*schema.NVDCVEFeedJSON10DefCPEName{}, // All entries have this field with an empty array.
Cpe23Uri: cpeMatch.Criteria, // All entries are in CPE 2.3 format.
VersionEndExcluding: derefPtr(cpeMatch.VersionEndExcluding),
VersionEndIncluding: derefPtr(cpeMatch.VersionEndIncluding),
VersionStartExcluding: derefPtr(cpeMatch.VersionStartExcluding),
VersionStartIncluding: derefPtr(cpeMatch.VersionStartIncluding),
Vulnerable: cpeMatch.Vulnerable,
})
}
children = append(children, &schema.NVDCVEFeedJSON10DefNode{
CPEMatch: cpeMatches,
Children: []*schema.NVDCVEFeedJSON10DefNode{},
Negate: derefPtr(node.Negate),
Operator: string(node.Operator),
})
}
nodes = append(nodes, &schema.NVDCVEFeedJSON10DefNode{
CPEMatch: []*schema.NVDCVEFeedJSON10DefCPEMatch{},
Children: children,
Negate: derefPtr(configuration.Negate),
Operator: string(*configuration.Operator),
})
} else {
for _, node := range configuration.Nodes {
cpeMatches := make([]*schema.NVDCVEFeedJSON10DefCPEMatch, 0, len(node.CPEMatch))
for _, cpeMatch := range node.CPEMatch {
cpeMatches = append(cpeMatches, &schema.NVDCVEFeedJSON10DefCPEMatch{
CPEName: []*schema.NVDCVEFeedJSON10DefCPEName{}, // All entries have this field with an empty array.
Cpe23Uri: cpeMatch.Criteria, // All entries are in CPE 2.3 format.
VersionEndExcluding: derefPtr(cpeMatch.VersionEndExcluding),
VersionEndIncluding: derefPtr(cpeMatch.VersionEndIncluding),
VersionStartExcluding: derefPtr(cpeMatch.VersionStartExcluding),
VersionStartIncluding: derefPtr(cpeMatch.VersionStartIncluding),
Vulnerable: cpeMatch.Vulnerable,
})
}
nodes = append(nodes, &schema.NVDCVEFeedJSON10DefNode{
CPEMatch: cpeMatches,
Children: []*schema.NVDCVEFeedJSON10DefNode{},
Negate: *node.Negate,
Operator: string(node.Operator),
})
}
}
}
var baseMetricV2 *schema.NVDCVEFeedJSON10DefImpactBaseMetricV2
for _, cvssMetricV2 := range cve.CVE.Metrics.CVSSMetricV2 {
if cvssMetricV2.Type != "Primary" {
continue
}
baseMetricV2 = &schema.NVDCVEFeedJSON10DefImpactBaseMetricV2{
AcInsufInfo: *cvssMetricV2.ACInsufInfo,
CVSSV2: &schema.CVSSV20{
AccessComplexity: derefPtr(cvssMetricV2.CVSSData.AccessComplexity),
AccessVector: derefPtr(cvssMetricV2.CVSSData.AccessVector),
Authentication: derefPtr(cvssMetricV2.CVSSData.Authentication),
AvailabilityImpact: derefPtr(cvssMetricV2.CVSSData.AvailabilityImpact),
AvailabilityRequirement: derefPtr(cvssMetricV2.CVSSData.AvailabilityRequirement),
BaseScore: cvssMetricV2.CVSSData.BaseScore,
CollateralDamagePotential: derefPtr(cvssMetricV2.CVSSData.CollateralDamagePotential),
ConfidentialityImpact: derefPtr(cvssMetricV2.CVSSData.ConfidentialityImpact),
ConfidentialityRequirement: derefPtr(cvssMetricV2.CVSSData.ConfidentialityRequirement),
EnvironmentalScore: derefPtr(cvssMetricV2.CVSSData.EnvironmentalScore),
Exploitability: derefPtr(cvssMetricV2.CVSSData.Exploitability),
IntegrityImpact: derefPtr(cvssMetricV2.CVSSData.IntegrityImpact),
IntegrityRequirement: derefPtr(cvssMetricV2.CVSSData.IntegrityRequirement),
RemediationLevel: derefPtr(cvssMetricV2.CVSSData.RemediationLevel),
ReportConfidence: derefPtr(cvssMetricV2.CVSSData.ReportConfidence),
TargetDistribution: derefPtr(cvssMetricV2.CVSSData.TargetDistribution),
TemporalScore: derefPtr(cvssMetricV2.CVSSData.TemporalScore),
VectorString: cvssMetricV2.CVSSData.VectorString,
Version: cvssMetricV2.CVSSData.Version,
},
ExploitabilityScore: derefPtr((*float64)(cvssMetricV2.ExploitabilityScore)),
ImpactScore: derefPtr((*float64)(cvssMetricV2.ImpactScore)),
ObtainAllPrivilege: derefPtr(cvssMetricV2.ObtainAllPrivilege),
ObtainOtherPrivilege: derefPtr(cvssMetricV2.ObtainOtherPrivilege),
ObtainUserPrivilege: derefPtr(cvssMetricV2.ObtainUserPrivilege),
Severity: derefPtr(cvssMetricV2.BaseSeverity),
UserInteractionRequired: derefPtr(cvssMetricV2.UserInteractionRequired),
}
}
var baseMetricV3 *schema.NVDCVEFeedJSON10DefImpactBaseMetricV3
for _, cvssMetricV30 := range cve.CVE.Metrics.CVSSMetricV30 {
if cvssMetricV30.Type != "Primary" {
continue
}
baseMetricV3 = &schema.NVDCVEFeedJSON10DefImpactBaseMetricV3{
CVSSV3: &schema.CVSSV30{
AttackComplexity: derefPtr(cvssMetricV30.CVSSData.AttackComplexity),
AttackVector: derefPtr(cvssMetricV30.CVSSData.AttackVector),
AvailabilityImpact: derefPtr(cvssMetricV30.CVSSData.AvailabilityImpact),
AvailabilityRequirement: derefPtr(cvssMetricV30.CVSSData.AvailabilityRequirement),
BaseScore: cvssMetricV30.CVSSData.BaseScore,
BaseSeverity: cvssMetricV30.CVSSData.BaseSeverity,
ConfidentialityImpact: derefPtr(cvssMetricV30.CVSSData.ConfidentialityImpact),
ConfidentialityRequirement: derefPtr(cvssMetricV30.CVSSData.ConfidentialityRequirement),
EnvironmentalScore: derefPtr(cvssMetricV30.CVSSData.EnvironmentalScore),
EnvironmentalSeverity: derefPtr(cvssMetricV30.CVSSData.EnvironmentalSeverity),
ExploitCodeMaturity: derefPtr(cvssMetricV30.CVSSData.ExploitCodeMaturity),
IntegrityImpact: derefPtr(cvssMetricV30.CVSSData.IntegrityImpact),
IntegrityRequirement: derefPtr(cvssMetricV30.CVSSData.IntegrityRequirement),
ModifiedAttackComplexity: derefPtr(cvssMetricV30.CVSSData.ModifiedAttackComplexity),
ModifiedAttackVector: derefPtr(cvssMetricV30.CVSSData.ModifiedAttackVector),
ModifiedAvailabilityImpact: derefPtr(cvssMetricV30.CVSSData.ModifiedAvailabilityImpact),
ModifiedConfidentialityImpact: derefPtr(cvssMetricV30.CVSSData.ModifiedConfidentialityImpact),
ModifiedIntegrityImpact: derefPtr(cvssMetricV30.CVSSData.ModifiedIntegrityImpact),
ModifiedPrivilegesRequired: derefPtr(cvssMetricV30.CVSSData.ModifiedPrivilegesRequired),
ModifiedScope: derefPtr(cvssMetricV30.CVSSData.ModifiedScope),
ModifiedUserInteraction: derefPtr(cvssMetricV30.CVSSData.ModifiedUserInteraction),
PrivilegesRequired: derefPtr(cvssMetricV30.CVSSData.PrivilegesRequired),
RemediationLevel: derefPtr(cvssMetricV30.CVSSData.RemediationLevel),
ReportConfidence: derefPtr(cvssMetricV30.CVSSData.ReportConfidence),
Scope: derefPtr(cvssMetricV30.CVSSData.Scope),
TemporalScore: derefPtr(cvssMetricV30.CVSSData.TemporalScore),
TemporalSeverity: derefPtr(cvssMetricV30.CVSSData.TemporalSeverity),
UserInteraction: derefPtr(cvssMetricV30.CVSSData.UserInteraction),
VectorString: cvssMetricV30.CVSSData.VectorString,
Version: cvssMetricV30.CVSSData.Version,
},
ExploitabilityScore: derefPtr((*float64)(cvssMetricV30.ExploitabilityScore)),
ImpactScore: derefPtr((*float64)(cvssMetricV30.ImpactScore)),
}
}
// Use CVSSMetricV31 if available (override CVSSMetricV30)
for _, cvssMetricV31 := range cve.CVE.Metrics.CVSSMetricV31 {
if cvssMetricV31.Type != "Primary" {
continue
}
baseMetricV3 = &schema.NVDCVEFeedJSON10DefImpactBaseMetricV3{
CVSSV3: &schema.CVSSV30{
AttackComplexity: derefPtr(cvssMetricV31.CVSSData.AttackComplexity),
AttackVector: derefPtr(cvssMetricV31.CVSSData.AttackVector),
AvailabilityImpact: derefPtr(cvssMetricV31.CVSSData.AvailabilityImpact),
AvailabilityRequirement: derefPtr(cvssMetricV31.CVSSData.AvailabilityRequirement),
BaseScore: cvssMetricV31.CVSSData.BaseScore,
BaseSeverity: cvssMetricV31.CVSSData.BaseSeverity,
ConfidentialityImpact: derefPtr(cvssMetricV31.CVSSData.ConfidentialityImpact),
ConfidentialityRequirement: derefPtr(cvssMetricV31.CVSSData.ConfidentialityRequirement),
EnvironmentalScore: derefPtr(cvssMetricV31.CVSSData.EnvironmentalScore),
EnvironmentalSeverity: derefPtr(cvssMetricV31.CVSSData.EnvironmentalSeverity),
ExploitCodeMaturity: derefPtr(cvssMetricV31.CVSSData.ExploitCodeMaturity),
IntegrityImpact: derefPtr(cvssMetricV31.CVSSData.IntegrityImpact),
IntegrityRequirement: derefPtr(cvssMetricV31.CVSSData.IntegrityRequirement),
ModifiedAttackComplexity: derefPtr(cvssMetricV31.CVSSData.ModifiedAttackComplexity),
ModifiedAttackVector: derefPtr(cvssMetricV31.CVSSData.ModifiedAttackVector),
ModifiedAvailabilityImpact: derefPtr(cvssMetricV31.CVSSData.ModifiedAvailabilityImpact),
ModifiedConfidentialityImpact: derefPtr(cvssMetricV31.CVSSData.ModifiedConfidentialityImpact),
ModifiedIntegrityImpact: derefPtr(cvssMetricV31.CVSSData.ModifiedIntegrityImpact),
ModifiedPrivilegesRequired: derefPtr(cvssMetricV31.CVSSData.ModifiedPrivilegesRequired),
ModifiedScope: derefPtr(cvssMetricV31.CVSSData.ModifiedScope),
ModifiedUserInteraction: derefPtr(cvssMetricV31.CVSSData.ModifiedUserInteraction),
PrivilegesRequired: derefPtr(cvssMetricV31.CVSSData.PrivilegesRequired),
RemediationLevel: derefPtr(cvssMetricV31.CVSSData.RemediationLevel),
ReportConfidence: derefPtr(cvssMetricV31.CVSSData.ReportConfidence),
Scope: derefPtr(cvssMetricV31.CVSSData.Scope),
TemporalScore: derefPtr(cvssMetricV31.CVSSData.TemporalScore),
TemporalSeverity: derefPtr(cvssMetricV31.CVSSData.TemporalSeverity),
UserInteraction: derefPtr(cvssMetricV31.CVSSData.UserInteraction),
VectorString: cvssMetricV31.CVSSData.VectorString,
Version: cvssMetricV31.CVSSData.Version,
},
ExploitabilityScore: derefPtr((*float64)(cvssMetricV31.ExploitabilityScore)),
ImpactScore: derefPtr((*float64)(cvssMetricV31.ImpactScore)),
}
}
lastModified, err := convertAPI20TimeToLegacy(cve.CVE.LastModified)
if err != nil {
logger.Log("msg", "failed to parse lastModified time", "err", err)
}
publishedDate, err := convertAPI20TimeToLegacy(cve.CVE.Published)
if err != nil {
logger.Log("msg", "failed to parse published time", "err", err)
}
return &schema.NVDCVEFeedJSON10DefCVEItem{
CVE: &schema.CVEJSON40{
Affects: nil, // Doesn't seem used.
CVEDataMeta: &schema.CVEJSON40CVEDataMeta{
ID: *cve.CVE.ID,
ASSIGNER: derefPtr(cve.CVE.SourceIdentifier),
STATE: "", // Doesn't seem used.
},
DataFormat: "MITRE", // All entries seem to have this format string.
DataType: "CVE", // All entries seem to have this type string.
DataVersion: "4.0", // All entries seem to have this version string.
Description: &schema.CVEJSON40Description{
DescriptionData: descriptions,
},
Problemtype: &schema.CVEJSON40Problemtype{
ProblemtypeData: problemtypeData,
},
References: &schema.CVEJSON40References{
ReferenceData: referenceData,
},
},
Configurations: &schema.NVDCVEFeedJSON10DefConfigurations{
CVEDataVersion: "4.0", // All entries seem to have this version string.
Nodes: nodes,
},
Impact: &schema.NVDCVEFeedJSON10DefImpact{
BaseMetricV2: baseMetricV2,
BaseMetricV3: baseMetricV3,
},
LastModifiedDate: lastModified,
PublishedDate: publishedDate,
}
}
// convertAPI20TimeToLegacy converts the timestamps from API 2.0 format to the expected legacy feed time format.
func convertAPI20TimeToLegacy(t *string) (string, error) {
const (
api20TimeFormat = "2006-01-02T15:04:05"
legacyTimeFormat = "2006-01-02T15:04Z"
)
var ts string
if t != nil {
tt, err := time.Parse(api20TimeFormat, *t)
if err != nil {
return "", err
}
ts = tt.Format(legacyTimeFormat)
}
return ts, nil
}

View File

@ -0,0 +1,142 @@
package nvdsync
import (
"bytes"
"compress/gzip"
"crypto/sha256"
"encoding/json"
"fmt"
"os"
"path/filepath"
"sort"
"strconv"
"strings"
"testing"
"github.com/facebookincubator/nvdtools/cvefeed/nvd/schema"
"github.com/go-kit/log"
"github.com/google/go-cmp/cmp"
"github.com/pandatix/nvdapi/v2"
"github.com/stretchr/testify/require"
)
var (
legacyCVEFeedsDir = filepath.Join("testdata", "cve", "legacy_feeds")
api20CVEDir = filepath.Join("testdata", "cve", "api_2.0")
)
func TestStoreCVEsLegacyFormat(t *testing.T) {
year := 2023
t.Run(fmt.Sprintf("%d", year), func(t *testing.T) {
// Load CVEs from legacy feed.
legacyCVEFilePath := filepath.Join(legacyCVEFeedsDir, fmt.Sprintf("%d.json.gz", year))
var legacyCVEs schema.NVDCVEFeedJSON10
loadJSONGz(t, legacyCVEFilePath, &legacyCVEs)
// Load CVEs from new API 2.0 feed.
api20CVEFilePath := filepath.Join(api20CVEDir, fmt.Sprintf("%d.json.gz", year))
var api20CVEs []nvdapi.CVEItem
loadJSONGz(t, api20CVEFilePath, &api20CVEs)
// Setup map of legacy CVEs.
legacyVulns := make(map[string]schema.NVDCVEFeedJSON10DefCVEItem) // key is the CVE ID.
for _, legacyVuln := range legacyCVEs.CVEItems {
legacyVulns[legacyVuln.CVE.CVEDataMeta.ID] = *legacyVuln
}
// Convert CVEs fetched using the new API 2.0 to the old legacy feeds format
// and compare them with the corresponding fetched legacy CVE.
var (
vulnsNotFoundInLegacy []string
mismatched []string
matched = 0
)
for _, api20Vuln := range api20CVEs {
convertedLegacyVuln := convertAPI20CVEToLegacy(api20Vuln, log.NewNopLogger())
legacyVuln, ok := legacyVulns[*api20Vuln.CVE.ID]
if !ok {
vulnsNotFoundInLegacy = append(vulnsNotFoundInLegacy, *api20Vuln.CVE.ID)
continue
}
if compareVulnerabilities(legacyVuln, *convertedLegacyVuln) {
matched++
} else {
mismatched = append(mismatched, *api20Vuln.CVE.ID)
}
}
matchRate := float64(matched) / float64(len(api20CVEs))
require.Greater(t, matchRate, .99)
t.Logf("%d: CVEs count: %d, match count: %d, match rate: %f", year, len(api20CVEs), matched, matchRate)
// TODO(lucas): Review these CVEs to check they are a-ok to be skipped.
t.Logf("%d: Vulnerabilities not found in legacy store: %s", year, strings.Join(vulnsNotFoundInLegacy, ", "))
t.Logf("%d: Vulnerabilities that mismatch from legacy store: %s", year, strings.Join(mismatched, ", "))
})
}
func compareVulnerabilities(v1 schema.NVDCVEFeedJSON10DefCVEItem, v2 schema.NVDCVEFeedJSON10DefCVEItem) bool {
clearDifferingFields := func(v *schema.NVDCVEFeedJSON10DefCVEItem) {
sort.Slice(v.CVE.References.ReferenceData, func(i, j int) bool {
return v.CVE.References.ReferenceData[i].URL < v.CVE.References.ReferenceData[j].URL
})
sortChildren(v.Configurations.Nodes)
for _, referenceData := range v.CVE.References.ReferenceData {
referenceData.Refsource = ""
referenceData.Name = referenceData.URL
}
// These fields mostly match, but sometimes differ.
v.CVE.CVEDataMeta.ASSIGNER = ""
v.CVE.Problemtype = nil
}
clearDifferingFields(&v1)
clearDifferingFields(&v2)
return cmp.Equal(v1, v2)
}
func loadJSONGz(t *testing.T, path string, v any) {
legacyCVEJSONGz, err := os.ReadFile(path)
require.NoError(t, err)
legacyCVEGzipReader, err := gzip.NewReader(bytes.NewReader(legacyCVEJSONGz))
require.NoError(t, err)
err = json.NewDecoder(legacyCVEGzipReader).Decode(v)
require.NoError(t, err)
require.NoError(t, legacyCVEGzipReader.Close())
}
func cpeMatchHash(v schema.NVDCVEFeedJSON10DefCPEMatch) string {
s := v.Cpe23Uri +
v.VersionEndExcluding +
v.VersionEndIncluding +
v.VersionStartExcluding +
v.VersionStartIncluding +
strconv.FormatBool(v.Vulnerable) +
v.Cpe22Uri
h := sha256.Sum256([]byte(s))
return string(h[:])
}
func childrenHash(v schema.NVDCVEFeedJSON10DefNode) string {
var s string
for _, cpeMatch := range v.CPEMatch {
s += cpeMatchHash(*cpeMatch)
}
for _, child := range v.Children {
s += childrenHash(*child)
}
s += v.Operator + strconv.FormatBool(v.Negate)
h := sha256.Sum256([]byte(s))
return string(h[:])
}
func sortChildren(children []*schema.NVDCVEFeedJSON10DefNode) {
for _, child := range children {
sort.Slice(child.CPEMatch, func(i, j int) bool {
return cpeMatchHash(*child.CPEMatch[i]) < cpeMatchHash(*child.CPEMatch[j])
})
sortChildren(child.Children)
}
sort.Slice(children, func(i, j int) bool {
return childrenHash(*children[i]) < childrenHash(*children[j])
})
}

Binary file not shown.

View File

@ -0,0 +1,50 @@
# nvdvuln
This tool can be used to reproduce false positive/negative vulnerabilities found by Fleet.
The tool has two modes of operation:
1. Run vulnerability processing using the NVD dataset on a specific software item. Such software item should be specified to the tool with the fields as stored in Fleet's `software` MySQL table.
2. Fetch software from a Fleet instance (and their found vulnerabilities), then, run vulnerability processing on such software and report any differences in CVEs against the Fleet instance. This mode of operation is useful to test new changes to the vulnerability processing.
PS: This tool is only useful on systems and software where the NVD dataset is used to detect vulnerabilities. For instance, this tool should not be used with Microsoft Office applications for macOS because Fleet uses a different dataset to detect vulnerabilities on such applications.
## Example Mode 1
```sh
go run -tags fts5 ./tools/nvdvuln \
-software_name Python.app \
-software_version 3.7.3 \
-software_source apps \
-software_bundle_identifier com.apple.python3 \
-sync \
-db_dir /tmp/vulndbtest
[...]
CVEs found for Python.app (3.7.3): CVE-2007-4559, CVE-2019-10160, CVE-2019-15903, CVE-2022-0391,
CVE-2020-14422, CVE-2020-10735, CVE-2023-40217, CVE-2015-20107, CVE-2016-3189, CVE-2018-25032,
CVE-2019-20907, CVE-2019-9740, CVE-2020-8315, CVE-2019-16056, CVE-2021-3177, CVE-2021-23336,
CVE-2022-48560, CVE-2022-45061, CVE-2019-18348, CVE-2019-16935, CVE-2019-9947, CVE-2021-4189,
CVE-2021-3426, CVE-2022-48566, CVE-2021-3733, CVE-2022-48564, CVE-2023-24329, CVE-2023-27043,
CVE-2019-12900, CVE-2021-28861, CVE-2023-36632, CVE-2022-48565, CVE-2019-9948, CVE-2020-8492,
CVE-2020-27619, CVE-2020-26116, CVE-2021-3737, CVE-2022-37454
```
## Example Mode 2
```sh
go run -tags fts5 ./tools/nvd/nvdvuln/nvdvuln.go \
-debug \
-sync \
-db_dir /tmp/vulndbtest \
-software_from_url https://fleet.example.com \
-software_from_api_token <...>
```
## CPU and memory usage
> Requirement: gnuplot (`brew install gnuplot`)
If set to `-debug` mode, the `nvdvuln` tool will sample its CPU and memory usage and store them on a file under the `-db_dir`.
Such data can be visualized with the following command:
```sh
./tools/nvd/nvdvuln/gnuplot.sh /path/to/db/directory
```

32
tools/nvd/nvdvuln/gnuplot.sh Executable file
View File

@ -0,0 +1,32 @@
#!/bin/bash
DB_DIR=$1
# Generate gnuplot commands to render CPU and memory data points
# from a cpu_and_mem.dat file under $DB_DIR.
cat <<EOF > gnuplot_commands.txt
set xdata time
set timefmt "%H:%M:%S"
set format x "%H:%M"
set xtics rotate by -45
set terminal jpeg
set ylabel '% CPU'
set yrange [0:800]
set ytics nomirror
set y2label 'Memory (MB)'
set y2range [0:5000]
set y2tics 0, 500
set output '$DB_DIR/cpu_and_mem.jpg'
plot '$DB_DIR/cpu_and_mem.dat' using 1:2 axis x1y1 with linespoints linetype -1 linecolor rgb 'blue' linewidth 1 title '% CPU', \
'$DB_DIR/cpu_and_mem.dat' using 1:3 axis x1y2 with linespoints linetype -1 linecolor rgb 'red' linewidth 1 title 'Memory (MB)'
EOF
gnuplot < gnuplot_commands.txt
rm gnuplot_commands.txt
open $DB_DIR/cpu_and_mem.jpg

View File

@ -0,0 +1,329 @@
package main
import (
"context"
"flag"
"fmt"
"os"
"path/filepath"
"sort"
"strings"
"time"
"github.com/fleetdm/fleet/v4/server/fleet"
"github.com/fleetdm/fleet/v4/server/mock"
"github.com/fleetdm/fleet/v4/server/service"
"github.com/fleetdm/fleet/v4/server/vulnerabilities/nvd"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/google/go-cmp/cmp"
"github.com/shirou/gopsutil/v3/process"
)
func main() {
sync := flag.Bool("sync", false, "If set, it will synchronize the vulnerability database before running vulnerability processing")
dbDir := flag.String("db_dir", "/tmp/vulndbs", "Path to the vulnerability database")
debug := flag.Bool("debug", false, "Sets debug mode")
// Fields that allow setting a specific software.
softwareName := flag.String("software_name", "", "Name of the software as ingested by Fleet")
softwareVersion := flag.String("software_version", "", "Version of the software as ingested by Fleet")
softwareSource := flag.String("software_source", "", "Source for this software (e.g. 'apps' for macOS applications)")
softwareBundleIdentifier := flag.String("software_bundle_identifier", "", "Bundle identifier of the software as ingested by Fleet (for macOS apps only)")
// Fields to fetch software (and the found vulnerabilities) from a Fleet instance.
// This mode of operation then compares the CVEs found by the Fleet instance with the CVEs found by this new run of vulnerability processing.
softwareFromURL := flag.String("software_from_url", "", "URL to get software from")
softwareFromAPIToken := flag.String("software_from_api_token", "", "API token to authenticate to get the software")
flag.Parse()
singleSoftwareSet := *softwareName != ""
softwareFromURLSet := *softwareFromURL != ""
if !*sync && !singleSoftwareSet && !softwareFromURLSet {
printf("Must either set --sync, --software_name or --software_from_url\n")
return
}
if singleSoftwareSet && softwareFromURLSet {
printf("Cannot set both --software_name and --software_from_url\n")
return
}
if singleSoftwareSet {
if *softwareVersion == "" {
printf("Must set --software_version\n")
return
}
if *softwareSource == "" {
printf("Must set --software_source\n")
return
}
}
if softwareFromURLSet {
if *softwareFromAPIToken == "" {
printf("Must set --software_from_api_token\n")
return
}
}
if err := os.MkdirAll(*dbDir, os.ModePerm); err != nil {
panic(err)
}
if *debug {
// Sample the process CPU and memory usage every second
// and store it on a file under the dbDir.
process, err := process.NewProcess(int32(os.Getpid()))
if err != nil {
panic(err)
}
cpuAndMemFile, err := os.Create(filepath.Join(*dbDir, "cpu_and_mem.dat"))
if err != nil {
panic(err)
}
defer cpuAndMemFile.Close()
go func() {
for {
select {
case <-time.After(1 * time.Second):
cpuPercent, err := process.CPUPercent()
if err != nil {
panic(err)
}
memInfo, err := process.MemoryInfo()
if err != nil {
panic(err)
}
now := time.Now().UTC().Format("15:04:05")
fmt.Fprintf(cpuAndMemFile, "%s %.2f %.2f\n", now, cpuPercent, float64(memInfo.RSS)/1024.0/1024.0)
}
}
}()
}
logger := log.NewJSONLogger(os.Stdout)
logger = log.With(logger, "ts", log.DefaultTimestampUTC)
if *debug {
logger = level.NewFilter(logger, level.AllowDebug())
} else {
logger = level.NewFilter(logger, level.AllowInfo())
}
if *sync {
printf("Syncing into %s...\n", *dbDir)
if err := vulnDBSync(*dbDir, *debug, logger); err != nil {
panic(err)
}
if !singleSoftwareSet && !softwareFromURLSet {
return
}
}
ctx := context.Background()
var software []fleet.Software
if singleSoftwareSet {
software = []fleet.Software{
{
Name: *softwareName,
Version: *softwareVersion,
Source: *softwareSource,
BundleIdentifier: *softwareBundleIdentifier,
},
}
} else { // softwareFromURLSet
software = getSoftwareFromURL(*softwareFromURL, *softwareFromAPIToken, *debug)
if *debug {
printf("Retrieved software:\n")
for _, s := range software {
printf("%+v\n", s)
}
}
// Set CPE to empty to trigger CPE matching.
for i := range software {
software[i].GenerateCPE = ""
}
}
ds := new(mock.Store)
ds.AllSoftwareIteratorFunc = func(ctx context.Context, query fleet.SoftwareIterQueryOptions) (fleet.SoftwareIterator, error) {
return &softwareIterator{software: software}, nil
}
var softwareCPEs []fleet.SoftwareCPE
ds.UpsertSoftwareCPEsFunc = func(ctx context.Context, cpes []fleet.SoftwareCPE) (int64, error) {
for _, cpe := range cpes {
var found bool
for _, storedCPEs := range softwareCPEs {
if storedCPEs == cpe {
found = true
break
}
}
if !found {
softwareCPEs = append(softwareCPEs, cpe)
}
}
if singleSoftwareSet || *debug {
for _, cpe := range cpes {
printf("Matched CPE: %d: %s\n", cpe.SoftwareID, cpe.CPE)
}
}
return int64(len(cpes)), nil
}
ds.ListSoftwareCPEsFunc = func(ctx context.Context) ([]fleet.SoftwareCPE, error) {
return softwareCPEs, nil
}
ds.InsertSoftwareVulnerabilityFunc = func(ctx context.Context, vuln fleet.SoftwareVulnerability, source fleet.VulnerabilitySource) (bool, error) {
return true, nil
}
ds.DeleteOutOfDateVulnerabilitiesFunc = func(ctx context.Context, source fleet.VulnerabilitySource, duration time.Duration) error {
return nil
}
printf("Translating software to CPE...\n")
err := nvd.TranslateSoftwareToCPE(ctx, ds, *dbDir, logger)
if err != nil {
panic(err)
}
if len(softwareCPEs) == 0 {
printf("Unable to match a CPE for the software...\n")
return
}
printf("Translating CPEs to CVEs...\n")
vulns, err := nvd.TranslateCPEToCVE(ctx, ds, *dbDir, logger, true, 1*time.Hour)
if err != nil {
panic(err)
}
if singleSoftwareSet {
var cves []string
for _, vuln := range vulns {
cves = append(cves, vuln.CVE)
}
printf("CVEs found for %s (%s): %s\n", *softwareName, *softwareVersion, strings.Join(cves, ", "))
} else { // softwareFromURLSet
expectedSoftwareMap := make(map[uint][]string)
for _, s := range software {
var vulnerabilities []string
for _, vulnerability := range s.Vulnerabilities {
vulnerabilities = append(vulnerabilities, vulnerability.CVE)
}
if len(vulnerabilities) == 0 {
continue
}
sort.Strings(vulnerabilities)
expectedSoftwareMap[s.ID] = vulnerabilities
}
foundSoftwareCVEs := make(map[uint][]string)
for _, vuln := range vulns {
foundSoftwareCVEs[vuln.SoftwareID] = append(foundSoftwareCVEs[vuln.SoftwareID], vuln.CVE)
}
for softwareID := range foundSoftwareCVEs {
sort.Strings(foundSoftwareCVEs[softwareID])
}
if *debug {
printf("Found vulnerabilities:\n")
for softwareID, cves := range foundSoftwareCVEs {
printf("%s (%d): %s\n", getSoftwareName(software, softwareID), softwareID, cves)
}
}
if cmp.Equal(expectedSoftwareMap, foundSoftwareCVEs) {
printf("CVEs found and expected matched!\n")
return
}
for s, expectedVulns := range expectedSoftwareMap {
if vulnsFound, ok := foundSoftwareCVEs[s]; !ok || !cmp.Equal(expectedVulns, vulnsFound) {
printf("Mismatched software %s (%d): expected=%+v vs found=%+v\n", getSoftwareName(software, s), s, expectedVulns, vulnsFound)
if ok {
delete(foundSoftwareCVEs, s)
}
}
}
for s, vulnsFound := range foundSoftwareCVEs {
if expectedVulns, ok := expectedSoftwareMap[s]; !ok || !cmp.Equal(expectedVulns, vulnsFound) {
printf("Mismatched software %s (%d): expected=%+v vs found=%+v\n", getSoftwareName(software, s), s, expectedVulns, vulnsFound)
}
}
}
}
func getSoftwareName(software []fleet.Software, softwareID uint) string {
for _, s := range software {
if s.ID == softwareID {
return s.Name + ":" + s.Version
}
}
panic(fmt.Sprintf("software %d not found", softwareID))
}
type softwareIterator struct {
software []fleet.Software
i int
}
func (s *softwareIterator) Next() bool {
if s.i >= len(s.software) {
return false
}
return true
}
func (s *softwareIterator) Value() (*fleet.Software, error) {
ss := &s.software[s.i]
s.i += 1
return ss, nil
}
func (s *softwareIterator) Err() error {
return nil
}
func (s *softwareIterator) Close() error {
return nil
}
func vulnDBSync(vulnDBDir string, debug bool, logger log.Logger) error {
opts := nvd.SyncOptions{
VulnPath: vulnDBDir,
Debug: debug,
}
err := nvd.Sync(opts, logger)
if err != nil {
return err
}
return nil
}
func getSoftwareFromURL(url, apiToken string, debug bool) []fleet.Software {
var clientOpts []service.ClientOption
if debug {
clientOpts = append(clientOpts, service.EnableClientDebug())
}
apiClient, err := service.NewClient(url, true, "", "", clientOpts...)
if err != nil {
panic(err)
}
apiClient.SetToken(apiToken)
software, err := apiClient.ListSoftware("")
if err != nil {
panic(err)
}
var filteredSoftware []fleet.Software
for _, s := range software {
if s.Source == "deb_packages" || s.Source == "rpm_packages" {
continue
}
filteredSoftware = append(filteredSoftware, s)
}
return filteredSoftware
}
func printf(format string, a ...any) {
fmt.Printf(time.Now().UTC().Format("2006-01-02T15:04:05Z")+": "+format, a...)
}

View File

@ -1,28 +0,0 @@
# nvdvuln
This tool can be used to reproduce false positive vulnerabilities found by Fleet.
The tool allows you to run vulnerability processing using the NVD dataset on a specific software item.
Such software item should be specified to the tool with the fields as stored in Fleet's `software` MySQL table.
PS: This tool is only useful on systems and software where the NVD dataset is used to detect vulnerabilities. For instance, this tool should not be used with Microsoft Office applications for macOS because Fleet uses a different dataset to detect vulnerabilities on such applications.
## Example
```sh
go run -tags fts5 ./tools/nvdvuln \
-software_name Python.app \
-software_version 3.7.3 \
-software_source apps \
-software_bundle_identifier com.apple.python3 \
-sync \
-vuln_db_dir /tmp/vulndbtest
[...]
CVEs found for Python.app (3.7.3): CVE-2007-4559, CVE-2019-10160, CVE-2019-15903, CVE-2022-0391,
CVE-2020-14422, CVE-2020-10735, CVE-2023-40217, CVE-2015-20107, CVE-2016-3189, CVE-2018-25032,
CVE-2019-20907, CVE-2019-9740, CVE-2020-8315, CVE-2019-16056, CVE-2021-3177, CVE-2021-23336,
CVE-2022-48560, CVE-2022-45061, CVE-2019-18348, CVE-2019-16935, CVE-2019-9947, CVE-2021-4189,
CVE-2021-3426, CVE-2022-48566, CVE-2021-3733, CVE-2022-48564, CVE-2023-24329, CVE-2023-27043,
CVE-2019-12900, CVE-2021-28861, CVE-2023-36632, CVE-2022-48565, CVE-2019-9948, CVE-2020-8492,
CVE-2020-27619, CVE-2020-26116, CVE-2021-3737, CVE-2022-37454
```

View File

@ -1,144 +0,0 @@
package main
import (
"context"
"flag"
"fmt"
"os"
"strings"
"time"
"github.com/fleetdm/fleet/v4/server/fleet"
"github.com/fleetdm/fleet/v4/server/mock"
"github.com/fleetdm/fleet/v4/server/vulnerabilities/nvd"
"github.com/go-kit/log"
)
func main() {
sync := flag.Bool("sync", false, "...")
vulnDBDir := flag.String("vuln_db_dir", "/tmp/vulndbs", "...")
softwareName := flag.String("software_name", "", "Name of the software as ingested by Fleet")
softwareVersion := flag.String("software_version", "", "Version of the software as ingested by Fleet")
softwareSource := flag.String("software_source", "", "Source for this software (e.g. 'apps' for macOS applications)")
softwareBundleIdentifier := flag.String("software_bundle_identifier", "", "Bundle identifier of the software as ingested by Fleet (for macOS apps only)")
flag.Parse()
if *softwareName == "" {
fmt.Println("Must set -software_name flag.")
return
}
if *softwareVersion == "" {
fmt.Println("Must set -software_version flag.")
return
}
if *softwareSource == "" {
fmt.Println("Must set -software_source flag.")
return
}
if err := os.MkdirAll(*vulnDBDir, os.ModePerm); err != nil {
panic(err)
}
logger := log.NewJSONLogger(os.Stdout)
if *sync {
fmt.Printf("Syncing into %s...\n", *vulnDBDir)
if err := vulnDBSync(*vulnDBDir, logger); err != nil {
panic(err)
}
}
ctx := context.Background()
ds := new(mock.Store)
ds.AllSoftwareIteratorFunc = func(ctx context.Context, query fleet.SoftwareIterQueryOptions) (fleet.SoftwareIterator, error) {
return &softwareIterator{
software: []fleet.Software{
{
Name: *softwareName,
Version: *softwareVersion,
Source: *softwareSource,
BundleIdentifier: *softwareBundleIdentifier,
},
},
}, nil
}
var softwareCPEs []fleet.SoftwareCPE
ds.UpsertSoftwareCPEsFunc = func(ctx context.Context, cpes []fleet.SoftwareCPE) (int64, error) {
softwareCPEs = cpes
for _, cpe := range cpes {
fmt.Printf("Matched CPE: %s\n", cpe.CPE)
}
return int64(len(cpes)), nil
}
ds.ListSoftwareCPEsFunc = func(ctx context.Context) ([]fleet.SoftwareCPE, error) {
return softwareCPEs, nil
}
ds.InsertSoftwareVulnerabilityFunc = func(ctx context.Context, vuln fleet.SoftwareVulnerability, source fleet.VulnerabilitySource) (bool, error) {
return true, nil
}
ds.DeleteOutOfDateVulnerabilitiesFunc = func(ctx context.Context, source fleet.VulnerabilitySource, duration time.Duration) error {
return nil
}
fmt.Println("Translating software to CPE...")
err := nvd.TranslateSoftwareToCPE(ctx, ds, *vulnDBDir, logger)
if err != nil {
panic(err)
}
if len(softwareCPEs) == 0 {
fmt.Println("Unable to match a CPE for the software...")
return
}
fmt.Println("Translating CPEs to CVEs...")
vulns, err := nvd.TranslateCPEToCVE(ctx, ds, *vulnDBDir, logger, true, 1*time.Hour)
if err != nil {
panic(err)
}
var cves []string
for _, vuln := range vulns {
cves = append(cves, vuln.CVE)
}
fmt.Printf("CVEs found for %s (%s): %s\n", *softwareName, *softwareVersion, strings.Join(cves, ", "))
}
type softwareIterator struct {
software []fleet.Software
i int
}
func (s *softwareIterator) Next() bool {
if s.i >= len(s.software) {
return false
}
return true
}
func (s *softwareIterator) Value() (*fleet.Software, error) {
ss := &s.software[s.i]
s.i += 1
return ss, nil
}
func (s *softwareIterator) Err() error {
return nil
}
func (s *softwareIterator) Close() error {
return nil
}
func vulnDBSync(vulnDBDir string, logger log.Logger) error {
opts := nvd.SyncOptions{
VulnPath: vulnDBDir,
}
err := nvd.Sync(opts, logger)
if err != nil {
return err
}
return nil
}