Website: Send custom metrics from anonymous usage statistics to Datadog (#11362)

. 
Closes: #10848 
Changes:
- Updated the `receive-usage-analytics` webhook to send custom metrics
to our Datadog account, where we can create graphs and dashboards to
track Fleet feature adoption, Fleet/osquery/orbit versions in use,
reported host counts, and stored errors.
- Added a new config variable: `sails.config.custom.datadogApiKey`
This commit is contained in:
Eric 2023-04-27 16:45:35 -05:00 committed by GitHub
parent 3496011e35
commit fe07c39f22
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 257 additions and 0 deletions

View File

@ -39,8 +39,264 @@ module.exports = {
fn: async function (inputs) {
// Create a database record for these usage statistics
await HistoricalUsageSnapshot.create(inputs);
if(!sails.config.custom.datadogApiKey) {
throw new Error('No Datadog API key configured! (Please set sails.config.custom.datadogApiKey)');
}
// Store strings and booleans as tags.
let baseMetricTags = [
`fleet_version:${inputs.fleetVersion}`,
`license_tier:${inputs.licenseTier}`,
`software_inventory_enabled:${inputs.softwareInventoryEnabled}`,
`vuln_detection_enabled:${inputs.vulnDetectionEnabled}`,
`system_users_enabled:${inputs.systemUsersEnabled}`,
`host_status_webhook_enabled:${inputs.hostStatusWebhookEnabled}`,
];
// Create a timestamp in seconds for these metrics
let metricsTimestampInSeconds = Math.floor(Date.now() / 1000);
// Build metrics for the usagle statistics that are numbers
let metricsToSendToDatadog = [
{
metric: 'usage_statistics.fleet_server_stats',
type: 3,
points: [{
timestamp: metricsTimestampInSeconds,
value: 1
}],
resources: [{
name: inputs.anonymousIdentifier,
type: 'fleet_instance'
}],
tags: baseMetricTags,
},
{
metric: 'usage_statistics.num_hosts_enrolled',
type: 3,
points: [{
timestamp: metricsTimestampInSeconds,
value: inputs.numHostsEnrolled
}],
resources: [{
name: inputs.anonymousIdentifier,
type: 'fleet_instance'
}],
tags: baseMetricTags,
},
{
metric: 'usage_statistics.num_users',
type: 3,
points: [{
timestamp: metricsTimestampInSeconds,
value: inputs.numUsers
}],
resources: [{
name: inputs.anonymousIdentifier,
type: 'fleet_instance'
}],
tags: baseMetricTags,
},
{
metric: 'usage_statistics.num_teams',
type: 3,
points: [{
timestamp: metricsTimestampInSeconds,
value: inputs.numTeams
}],
resources: [{
name: inputs.anonymousIdentifier,
type: 'fleet_instance'
}],
tags: baseMetricTags,
},
{
metric: 'usage_statistics.num_policies',
type: 3,
points: [{
timestamp: metricsTimestampInSeconds,
value: inputs.numPolicies
}],
resources: [{
name: inputs.anonymousIdentifier,
type: 'fleet_instance'
}],
tags: baseMetricTags,
},
{
metric: 'usage_statistics.num_labels',
type: 3,
points: [{
timestamp: metricsTimestampInSeconds,
value: inputs.numLabels
}],
resources: [{
name: inputs.anonymousIdentifier,
type: 'fleet_instance'
}],
tags: baseMetricTags,
},
{
metric: 'usage_statistics.num_weekly_active_users',
type: 3,
points: [{
timestamp: metricsTimestampInSeconds,
value: inputs.numWeeklyActiveUsers
}],
resources: [{
name: inputs.anonymousIdentifier,
type: 'fleet_instance'
}],
tags: baseMetricTags,
},
{
metric: 'usage_statistics.num_weekly_policy_violation_days_actual',
type: 3,
points: [{
timestamp: metricsTimestampInSeconds,
value: inputs.numWeeklyPolicyViolationDaysActual
}],
resources: [{
name: inputs.anonymousIdentifier,
type: 'fleet_instance'
}],
tags: baseMetricTags,
},
{
metric: 'usage_statistics.num_weekly_policy_violation_days_possible',
type: 3,
points: [{
timestamp: metricsTimestampInSeconds,
value: inputs.numWeeklyPolicyViolationDaysPossible
}],
resources: [{
name: inputs.anonymousIdentifier,
type: 'fleet_instance'
}],
tags: baseMetricTags,
},
{
metric: 'usage_statistics.num_hosts_not_responding',
type: 3,
points: [{
timestamp: metricsTimestampInSeconds,
value: inputs.numHostsNotResponding
}],
resources: [{
name: inputs.anonymousIdentifier,
type: 'fleet_instance'
}],
tags: baseMetricTags,
},
];
// Build metrics for logged errors
if(inputs.storedErrors.length > 0) {
// If inputs.storedErrors is not an empty array, we'll iterate through it to build custom metric for each object in the array
for(let error of inputs.storedErrors) {
// Create a new array of tags for this error
let errorTags = _.clone(baseMetricTags);
let errorLocation = 1;
// Create a tag for each error location
for(let location of error.loc) { // iterate throught the location array of this error
// Add the error's location as a custom tag (SNAKE_CASED)
errorTags.push(`error_location_${errorLocation}:${location.replace(/\s/gi, '_')}`);
errorLocation++;
}
//
let metricToAdd = {
metric: 'usage_statistics.stored_errors',
type: 3,
points: [{timestamp: metricsTimestampInSeconds, value: error.count}],
resources: [{name: inputs.anonymousIdentifier, type: 'fleet_instance'}],
tags: errorTags,
};
// Add the custom metric to the array of metrics to send to Datadog.
metricsToSendToDatadog.push(metricToAdd);
}//∞
}//fi
// If inputs.hostsEnrolledByOrbitVersion is not an empty array, we'll iterate through it to build custom metric for each object in the array
if(inputs.hostsEnrolledByOrbitVersion.length > 0) {
for(let version of inputs.hostsEnrolledByOrbitVersion) {
let orbitVersionTags = _.clone(baseMetricTags);
orbitVersionTags.push(`orbit_version:${version.orbitVersion}`);
let metricToAdd = {
metric: 'usage_statistics.host_count_by_orbit_version',
type: 3,
points: [{timestamp: metricsTimestampInSeconds, value:version.numHosts}],
resources: [{name: inputs.anonymousIdentifier, type: 'fleet_instance'}],
tags: orbitVersionTags,
};
// Add the custom metric to the array of metrics to send to Datadog.
metricsToSendToDatadog.push(metricToAdd);
}//∞
}//fi
// If inputs.hostsEnrolledByOsqueryVersion is not an empty array, we'll iterate through it to build custom metric for each object in the array
if(inputs.hostsEnrolledByOsqueryVersion.length > 0) {
for(let version of inputs.hostsEnrolledByOsqueryVersion) {
let osqueryVersionTags = _.clone(baseMetricTags);
osqueryVersionTags.push(`osquery_version:${version.osqueryVersion}`);
let metricToAdd = {
metric: 'usage_statistics.host_count_by_osquery_version',
type: 3,
points: [{timestamp: metricsTimestampInSeconds, value:version.numHosts}],
resources: [{name: inputs.anonymousIdentifier, type: 'fleet_instance'}],
tags: osqueryVersionTags,
};
// Add the custom metric to the array of metrics to send to Datadog.
metricsToSendToDatadog.push(metricToAdd);
}//∞
}//fi
// If the hostByOperatingSystem is not an empty object, we'll iterate through the object to build metrics for each type of operating system.
// See https://fleetdm.com/docs/using-fleet/usage-statistics#what-is-included-in-usage-statistics-in-fleet to see an example of a hostByOperatingSystem send by Fleet instances.
if(_.keys(inputs.hostsEnrolledByOperatingSystem).length > 0) {
// Iterate through each array of objects
for(let operatingSystem in inputs.hostsEnrolledByOperatingSystem) {
// For every object in the array, we'll send a metric to track host count for each operating system version.
for(let osVersion of inputs.hostsEnrolledByOperatingSystem[operatingSystem]) {
// Only continue if the object in the array has a numEnrolled and version value.
if(osVersion.numEnrolled && osVersion.version) {
// Clone the baseMetricTags array, each metric will have the operating version name added as a `os_version_name` tag
let osInfoTags = _.clone(baseMetricTags);
osInfoTags.push(`os_version_name:${osVersion.version}`);
let metricToAdd = {
metric: 'usage_statistics.host_count_by_os_version',
type: 3,
points: [{timestamp: metricsTimestampInSeconds, value:osVersion.numEnrolled}],
resources: [{name: operatingSystem, type: 'os_type'}],
tags: osInfoTags,
};
// Add the custom metric to the array of metrics to send to Datadog.
metricsToSendToDatadog.push(metricToAdd);
}//fi
}//∞
}//∞
}//fi
await sails.helpers.http.post.with({
url: 'https://api.us5.datadoghq.com/api/v2/series',
data: {
series: metricsToSendToDatadog,
},
headers: {
'DD-API-KEY': sails.config.custom.datadogApiKey,
'Content-Type': 'application/json',
}
}).tolerate((err)=>{
// If there was an error sending metrics to Datadog, we'll log the error in a warning, but we won't throw an error.
// This way, we'll still return a 200 status to the Fleet instance that sent usage analytics.
sails.log.warn(`When the receive-usage-analytics webhook tried to send metrics to Datadog, an error occured. Raw error: ${require('util').inspect(err)}`);
});
}

View File

@ -186,6 +186,7 @@ module.exports.custom = {
// githubBotWebhookSecret: '…',
// slackWebhookUrlForGithubBot: '…',
// mergeFreezeAccessToken: '…',
// datadogApiKey: '…',
//…