fleet/cmd/cpe/generate.go
Victor Lyuboslavsky eada583ff1
Updating CPE generator to use new NVD API. (#15018)
Loom explaining changes (hit 5 min limit):
https://www.loom.com/share/e59b63bf638e4d9cad7984ef589b878d?sid=111fff75-115a-4a44-ae4f-6f25fede0d51

#14887

- [x] Need to merge fleetdm/nvd PR
https://github.com/fleetdm/nvd/pull/25 before this one.

# Checklist for submitter

- [x] Added/updated tests
- [x] Manual QA for all new/changed functionality
- Manually tested (with corresponding fleetdm/fleet changes) in my
personal fork: https://github.com/getvictor/nvd/releases

# QA Plan (must be done before merging this PR, and after merging the
nvd PR)
- [ ] Fork https://github.com/fleetdm/nvd and point `generate.yml` to
this branch.
[example](9d8e54930b/.github/workflows/generate.yml (L26))
- [ ] Add NVD_API_KEY to nvd secrets, and run the the nvd generate
GitHub action. Get key:
https://nvd.nist.gov/developers/request-an-api-key
- [ ] Compare the generated `cpe-###.sqlite.gz` to the previous one. One
way is to open it up with sqlite3 and `select * from cpe_2 order by
cpe23;` and dump results to a CSV file. Known differences are:
   - New file has ~2,500 more records
- Backslashes are handled differently for `Backpack\CRUD` and `Philips
In.Sight B120\37` products -- not a new issue since we do not support
those products right now
- `cpe:2.3🅰️moodle:moodle:4.2.0:*:*:*:*:*:*:*` -- this appears OK.
Also, it is a PHP plugin, and we don't support these currently.
- [ ] Record the existing vulnerabilities of current hosts.
- [ ] Stop any running fleet server. Delete `/tmp/vulndbs/cpe.sqlite`.
Can also delete other files there, or not delete this file -- it should
be overwritten by the new file. Also delete all rows in software_cpe and
software_cve DB tables. (Or can just spin up a fresh fleet server with
fresh DB, and re-enroll hosts (after setting the new env variable
below))
- [ ] Find the path to the generated `cpe-###.sqlite.gz` file
- [ ] Set `FLEET_VULNERABILITIES_CPE_DATABASE_URL` environment variable
to the above path, and start fleet server.
- [ ] After server's vulnerabilities cron job runs, the new
vulnerabilities should match the previous vulnerabilities
2023-11-20 16:10:00 -06:00

231 lines
5.7 KiB
Go

package main
import (
"compress/gzip"
"crypto/sha256"
"fmt"
"github.com/facebookincubator/nvdtools/cpedict"
"github.com/facebookincubator/nvdtools/wfn"
"github.com/fleetdm/fleet/v4/pkg/fleethttp"
"github.com/fleetdm/fleet/v4/server/ptr"
"github.com/fleetdm/fleet/v4/server/vulnerabilities/nvd"
"github.com/pandatix/nvdapi/common"
"github.com/pandatix/nvdapi/v2"
"io"
"log"
"log/slog"
"os"
"path/filepath"
"strings"
"time"
)
const (
httpClientTimeout = 2 * time.Minute
waitTimeBetweenRequests = 6 * time.Second
waitTimeForRetry = 30 * time.Second
maxRetryAttempts = 10
apiKeyEnvVar = "NVD_API_KEY" //nolint:gosec
)
func panicIf(err error) {
if err != nil {
panic(err)
}
}
func main() {
apiKey := os.Getenv(apiKeyEnvVar)
logHandler := slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelInfo})
slog.SetDefault(slog.New(logHandler))
if apiKey == "" {
log.Fatal(fmt.Sprintf("Must set %v environment variable", apiKeyEnvVar))
}
cwd, err := os.Getwd()
panicIf(err)
slog.Info(fmt.Sprintf("CWD: %v", cwd))
client := fleethttp.NewClient(fleethttp.WithTimeout(httpClientTimeout))
dbPath := getCPEs(client, apiKey, cwd)
slog.Info(fmt.Sprintf("Sqlite file %s size: %.2f MB\n", dbPath, getSizeMB(dbPath)))
slog.Info("Compressing DB...")
compressedPath, err := compress(dbPath)
panicIf(err)
slog.Info("Calculating SHA256...")
compressedPath, err = addSHA256(compressedPath)
panicIf(err)
slog.Info(fmt.Sprintf("Final compressed file %s size: %.2f MB\n", compressedPath, getSizeMB(compressedPath)))
slog.Info("Done.")
}
func getSizeMB(path string) float64 {
info, err := os.Stat(path)
panicIf(err)
return float64(info.Size()) / 1024.0 / 1024.0
}
func getCPEs(client common.HTTPClient, apiKey string, resultPath string) string {
slog.Info("Fetching CPEs from NVD...")
nvdClient, err := nvdapi.NewNVDClient(client, apiKey)
panicIf(err)
var cpes []cpedict.CPEItem
retryAttempts := 0
totalResults := 1
for startIndex := 0; startIndex < totalResults; {
cpeResponse, err := nvdapi.GetCPEs(nvdClient, nvdapi.GetCPEsParams{StartIndex: ptr.Int(startIndex)})
if err != nil {
if retryAttempts > maxRetryAttempts {
panicIf(err)
}
slog.Warn(fmt.Sprintf("NVD request returned error:'%v' Retrying in %v", err.Error(), waitTimeForRetry.String()))
retryAttempts++
time.Sleep(waitTimeForRetry)
continue
}
retryAttempts = 0
totalResults = cpeResponse.TotalResults
slog.Info(fmt.Sprintf("Got %v results", cpeResponse.ResultsPerPage))
startIndex += cpeResponse.ResultsPerPage
for _, product := range cpeResponse.Products {
cpes = append(cpes, convertToCPEItem(product.CPE))
}
if startIndex < totalResults {
// NVD API recommendation to sleep between requests: https://nvd.nist.gov/developers/api-workflows
time.Sleep(waitTimeBetweenRequests)
slog.Info(fmt.Sprintf("Fetching index %v out of %v", startIndex, totalResults))
}
}
// Sanity check
if totalResults <= 1 || len(cpes) != totalResults {
log.Fatal(fmt.Sprintf("Invalid number of expected results:%v or actual results:%v", totalResults, len(cpes)))
}
slog.Info("Generating CPE sqlite DB...")
dbPath := filepath.Join(resultPath, fmt.Sprint("cpe.sqlite"))
err = nvd.GenerateCPEDB(dbPath, cpes)
panicIf(err)
return dbPath
}
func convertToCPEItem(in nvdapi.CPE) (out cpedict.CPEItem) {
out = cpedict.CPEItem{}
// CPE name
wfName, err := wfn.Parse(in.CPEName)
panicIf(err)
out.CPE23 = cpedict.CPE23Item{
Name: cpedict.NamePattern(*wfName),
}
// Deprecations
out.Deprecated = in.Deprecated
if in.Deprecated {
out.CPE23.Deprecation = &cpedict.Deprecation{}
for _, item := range in.DeprecatedBy {
deprecatorName, err := wfn.Parse(*item.CPEName)
panicIf(err)
deprecatorInfo := cpedict.DeprecatedInfo{
Name: cpedict.NamePattern(*deprecatorName),
}
out.CPE23.Deprecation.DeprecatedBy = append(out.CPE23.Deprecation.DeprecatedBy, deprecatorInfo)
}
}
// Title
out.Title = cpedict.TextType{}
for _, title := range in.Titles {
// only using English language
if title.Lang == "en" {
out.Title["en-US"] = title.Title
break
}
}
// The following fields are not needed by subsequent code:
// out.DeprecatedBy
// out.DeprecationDate
// out.Notes
// out.References
return out
}
func compress(path string) (string, error) {
compressedPath := fmt.Sprintf("%s.gz", path)
compressedDB, err := os.Create(compressedPath)
if err != nil {
return "", err
}
defer closeFile(compressedDB)
db, err := os.Open(path)
if err != nil {
return "", err
}
defer closeFile(db)
w := gzip.NewWriter(compressedDB)
defer func(w *gzip.Writer) {
err := w.Close()
if err != nil {
slog.Error(fmt.Sprintf("Could not close gzip.Writer: %v", err.Error()))
}
}(w)
_, err = io.Copy(w, db)
if err != nil {
return "", err
}
return compressedPath, nil
}
// addSHA256 adds the file's SHA256 checksum to its name
func addSHA256(path string) (string, error) {
file, err := os.Open(path)
if err != nil {
return "", err
}
defer closeFile(file)
hash := sha256.New()
if _, err := io.Copy(hash, file); err != nil {
return "", err
}
newPath, err := replaceLast(path, "cpe.sqlite.gz", fmt.Sprintf("cpe-%x.sqlite.gz", hash.Sum(nil)))
if err != nil {
return "", err
}
err = os.Rename(path, newPath)
return newPath, err
}
// replaceLast replaces the last occurrence of string
func replaceLast(s, old, new string) (string, error) {
i := strings.LastIndex(s, old)
if i == -1 {
return "", fmt.Errorf("substring:%v not found in string:%v", old, s)
}
return s[:i] + new + s[i+len(old):], nil
}
func closeFile(file *os.File) {
err := file.Close()
if err != nil {
slog.Error(fmt.Sprintf("Could not close file %v: %v", file.Name(), err.Error()))
}
}