hashicorp / terraform-provider-azurerm

Terraform provider for Azure Resource Manager

Home Page:https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

Support for Azure Monitor Data Collection Rules

ricohomewood opened this issue · comments

Community Note

  • Please vote on this issue by adding a 👍 reaction to the original issue to help the community and maintainers prioritize this request
  • Please do not leave "+1" or "me too" comments, they generate extra noise for issue followers and do not help prioritize the request
  • If you are interested in working on this issue or have submitted a pull request, please leave a comment

Description

With the new Azure Monitor Guest Health agent extension, there is a condition that the enabling of this agent creates a required Data Collection Rule in Azure Monitor for VMInsights for this to work.

Please can we support the Microsoft.Insights/dataCollectionRules namespace as per the Azure API: https://docs.microsoft.com/en-us/rest/api/monitor/datacollectionrules

New or Affected Resource(s)

  • azurerm_monitor_data_collection_rule

Potential Terraform Configuration

resource "azurerm_monitor_data_collection_rule" "example" {
name                            = dataCollectionRuleName
resource_group_name             = resourceGroupName
location                        = location

settings = <<SETTINGS
  {
  "properties": {
    "dataSources": {
      "performanceCounters": [
        {
          "name": "cloudTeamCoreCounters",
          "streams": [
            "Microsoft-Perf"
          ],
          "scheduledTransferPeriod": "PT1M",
          "samplingFrequencyInSeconds": 15,
          "counterSpecifiers": [
            "\\Processor(_Total)\\% Processor Time",
            "\\Memory\\Committed Bytes",
            "\\LogicalDisk(_Total)\\Free Megabytes",
            "\\PhysicalDisk(_Total)\\Avg. Disk Queue Length"
          ]
        },
        {
          "name": "appTeamExtraCounters",
          "streams": [
            "Microsoft-Perf"
          ],
          "scheduledTransferPeriod": "PT5M",
          "samplingFrequencyInSeconds": 30,
          "counterSpecifiers": [
            "\\Process(_Total)\\Thread Count"
          ]
        }
      ],
      "windowsEventLogs": [
        {
          "name": "cloudSecurityTeamEvents",
          "streams": [
            "Microsoft-WindowsEvent"
          ],
          "scheduledTransferPeriod": "PT1M",
          "xPathQueries": [
            "Security!"
          ]
        },
        {
          "name": "appTeam1AppEvents",
          "streams": [
            "Microsoft-WindowsEvent"
          ],
          "scheduledTransferPeriod": "PT5M",
          "xPathQueries": [
            "System![System[(Level = 1 or Level = 2 or Level = 3)]]",
            "Application!*[System[(Level = 1 or Level = 2 or Level = 3)]]"
          ]
        }
      ],
      "syslog": [
        {
          "name": "cronSyslog",
          "streams": [
            "Microsoft-Syslog"
          ],
          "facilityNames": [
            "cron"
          ],
          "logLevels": [
            "Debug",
            "Critical",
            "Emergency"
          ]
        },
        {
          "name": "syslogBase",
          "streams": [
            "Microsoft-Syslog"
          ],
          "facilityNames": [
            "syslog"
          ],
          "logLevels": [
            "Alert",
            "Critical",
            "Emergency"
          ]
        }
      ]
    },
    "destinations": {
      "logAnalytics": [
        {
          "workspaceResourceId": "/subscriptions/<subscription_id>/resourceGroups/<resource_group>/providers/Microsoft.OperationalInsights/workspaces/centralTeamWorkspace",
          "name": "centralWorkspace"
        }
      ]
    },
    "dataFlows": [
      {
        "streams": [
          "Microsoft-Perf",
          "Microsoft-Syslog",
          "Microsoft-WindowsEvent"
        ],
        "destinations": [
          "centralWorkspace"
        ]
      }
    ]
  }
SETTINGS
}

References

commented

Here's a CLI implementation waiting for the resource

variable "client_name" {
  description = "Client name"
  type        = string
}

variable "environment" {
  description = "Environment name"
  type        = string
}

variable "stack" {
  description = "Stack name"
  type        = string
}

variable "resource_group_name" {
  description = "Resource Group the resources will belong to"
  type        = string
}

variable "location" {
  description = "Azure location."
  type        = string
}

variable "location_short" {
  description = "Short string for Azure location."
  type        = string
}

variable "name_prefix" {
  description = "Name prefix for all resources generated name"
  type        = string
  default     = ""
}

variable "extra_tags" {
  description = "Extra tags to add"
  type        = map(string)
  default     = {}
}

variable "log_analytics_workspace_id" {
  description = "Log Analytics Workspace ID where the metrics are sent"
  type        = string
}

variable "syslog_facilities_names" {
  description = "List of syslog to retrieve in Data Collection Rule"
  type        = list(string)
  default = ["auth", "authpriv", "cron", "daemon", "mark", "kern", "local0", "local1", "local2", "local3", "local4",
  "local5", "local6", "local7", "lpr", "mail", "news", "syslog", "user", "UUCP"]
}

variable "syslog_levels" {
  description = "List of syslog levels to retrieve in Data Collection Rule"
  type        = list(string)
  default     = ["Error", "Critical", "Alert", "Emergency"]
}

locals {
  default_tags = {
    env   = var.environment
    stack = var.stack
  }

  subscription_id = data.azurerm_client_config.current.subscription_id

  name_prefix = var.name_prefix != "" ? replace(var.name_prefix, "/[a-z0-9]$/", "$0-") : ""
  name        = "${local.name_prefix}${var.stack}-${var.client_name}-${var.location_short}-${var.environment}-dcr"

  data_collection_rule_id = "/subscriptions/${local.subscription_id}/resourceGroups/${var.resource_group_name}/providers/Microsoft.Insights/dataCollectionRules/${local.name}"
}

data "azurerm_client_config" "current" {}

data "template_file" "data_collection_rule" {
  template = file(format("%s/files/data-collection-rule.json.tpl", path.module))

  vars = {
    location                   = var.location
    log_analytics_workspace_id = var.log_analytics_workspace_id
    syslog_facility_names      = jsonencode(var.syslog_facilities_names)
    syslog_levels              = jsonencode(var.syslog_levels)
    tags                       = jsonencode(merge(local.default_tags, var.extra_tags))
  }
}

resource "null_resource" "data_collection_rule" {
  provisioner "local-exec" {
    command = <<EOC
      az rest --subscription ${local.subscription_id} \
              --method PUT \
              --url https://management.azure.com${local.data_collection_rule_id}?api-version=2019-11-01-preview \
              --body '${data.template_file.data_collection_rule.rendered}'
EOC
  }

  triggers = {
    data = md5(data.template_file.data_collection_rule.rendered)
  }
}

And the template file

{
  "location": "${location}",
  "tags": ${tags},
  "properties": {
    "dataFlows": [
      {
        "destinations": [
          "azureMonitorMetrics-default"
        ],
        "streams": [
          "Microsoft-InsightsMetrics"
        ]
      },
      {
        "destinations": [
          "centralWorkspace"
        ],
        "streams": [
          "Microsoft-Perf",
          "Microsoft-Event",
          "Microsoft-Syslog"
        ]
      }
    ],
    "dataSources": {
      "performanceCounters": [
        {
          "counterSpecifiers": [
            "\\Processor Information(_Total)\\% Processor Time",
            "\\Processor Information(_Total)\\% Privileged Time",
            "\\Processor Information(_Total)\\% User Time",
            "\\Processor Information(_Total)\\Processor Frequency",
            "\\System\\Processes",
            "\\Process(_Total)\\Thread Count",
            "\\Process(_Total)\\Handle Count",
            "\\System\\System Up Time",
            "\\System\\Context Switches/sec",
            "\\System\\Processor Queue Length",
            "\\Memory\\% Committed Bytes In Use",
            "\\Memory\\Available Bytes",
            "\\Memory\\Committed Bytes",
            "\\Memory\\Cache Bytes",
            "\\Memory\\Pool Paged Bytes",
            "\\Memory\\Pool Nonpaged Bytes",
            "\\Memory\\Pages/sec",
            "\\Memory\\Page Faults/sec",
            "\\Process(_Total)\\Working Set",
            "\\Process(_Total)\\Working Set - Private",
            "\\LogicalDisk(_Total)\\% Disk Time",
            "\\LogicalDisk(_Total)\\% Disk Read Time",
            "\\LogicalDisk(_Total)\\% Disk Write Time",
            "\\LogicalDisk(_Total)\\% Idle Time",
            "\\LogicalDisk(_Total)\\Disk Bytes/sec",
            "\\LogicalDisk(_Total)\\Disk Read Bytes/sec",
            "\\LogicalDisk(_Total)\\Disk Write Bytes/sec",
            "\\LogicalDisk(_Total)\\Disk Transfers/sec",
            "\\LogicalDisk(_Total)\\Disk Reads/sec",
            "\\LogicalDisk(_Total)\\Disk Writes/sec",
            "\\LogicalDisk(_Total)\\Avg. Disk sec/Transfer",
            "\\LogicalDisk(_Total)\\Avg. Disk sec/Read",
            "\\LogicalDisk(_Total)\\Avg. Disk sec/Write",
            "\\LogicalDisk(_Total)\\Avg. Disk Queue Length",
            "\\LogicalDisk(_Total)\\Avg. Disk Read Queue Length",
            "\\LogicalDisk(_Total)\\Avg. Disk Write Queue Length",
            "\\LogicalDisk(_Total)\\% Free Space",
            "\\LogicalDisk(_Total)\\Free Megabytes",
            "\\Network Interface(*)\\Bytes Total/sec",
            "\\Network Interface(*)\\Bytes Sent/sec",
            "\\Network Interface(*)\\Bytes Received/sec",
            "\\Network Interface(*)\\Packets/sec",
            "\\Network Interface(*)\\Packets Sent/sec",
            "\\Network Interface(*)\\Packets Received/sec",
            "\\Network Interface(*)\\Packets Outbound Errors",
            "\\Network Interface(*)\\Packets Received Errors"
          ],
          "name": "perfCounterDataSource",
          "samplingFrequencyInSeconds": 30,
          "scheduledTransferPeriod": "PT1M",
          "streams": [
            "Microsoft-Perf",
            "Microsoft-InsightsMetrics"
          ]
        }
      ],
      "syslog": [
        {
          "facilityNames": ${syslog_facility_names},
          "logLevels": ${syslog_levels},
          "name": "sysLogsDataSource",
          "streams": [
            "Microsoft-Syslog"
          ]
        }
      ],
      "windowsEventLogs": [
        {
          "name": "eventLogsDataSource",
          "scheduledTransferPeriod": "PT5M",
          "streams": [
            "Microsoft-Event"
          ],
          "xPathQueries": [
            "Application!*[System[(Level=1 or Level=2)]]",
            "System!*[System[(Level=1 or Level=2)]]"
          ]
        }
      ]
    },
    "destinations": {
      "azureMonitorMetrics": {
        "name": "azureMonitorMetrics-default"
      },
      "logAnalytics": [
        {
          "name": "centralWorkspace",
          "workspaceResourceId": "${log_analytics_workspace_id}"
        }
      ]
    },
  }
}

Has there been any progress with this?

Giving this a bump. The Azure Monitoring Agent (AMA) is due to go GA very soon and switching to the AMA and Data Collection Rules (DCRs) would fix so many issues.

AMA should reach feature parity against the older agents (e.g. MMA, Dependency agent) so switching to this will address long standing gaps in Terraform for Agent configuration such as issue #3182 and the matching REST API issues.

Thanks to @BzSpi for providing the null provider REST API workaround. I'll be stealing that in the meantime!

AMAs and DCRs are Generally Available (GA) now. Are there any updates as far as Terraform support for these?

Would be great to get an update for when creation of Data Collection Rules (DCRs) will be available in terraform

@katbyte Note that Microsoft have pushed the Log Analytics Agent on an EOL path and started a marketing campaign to migrate uses to the new Azure Monitor Agent with a migration plan including creating the Data Collection Rules. As Microsoft are now urging customers to migrate, can this issue get some focus so that we can migrate?

This is the docs Microsoft are pushing to people in order to migrate: https://docs.microsoft.com/en-us/azure/azure-monitor/agents/azure-monitor-agent-migration

Are there any updates on this,

Any update? Would be great to have this.

Update? I'm in the process of planning the move to Azure Monitor agent and could really do with this. Currently testing using the null resource, thanks to @BzSpi

commented

The code I've put above has been integrated in a module and will be updated as soon as the resource will be available if this helps.
https://registry.terraform.io/modules/claranet/run-iaas/azurerm/latest/submodules/vm-monitoring

This is a submodule of a more global module that aims to deploy everything necessary for VM run in production.

Bump, please can this issue get some focus so that we can migrate?

It's a pity this is taking so long to be supported. For now using Powershell scripting but the REST version of the Claranet PCP is a good alternative

Having support for Data Collection Rules and Data Collection Rules Associations for VMs would be greatly appreciated for migrating to Azure Monitor

came to this link after searching for DCR support on TF. Whats the recommend workaround until than ?

Hi @nitish81. You could use the new azapi resource provider as a workaround until the resource type is supported in the azurerm provider.

Here is a working config I pulled together earlier in https://github.com/richeney/azapi.

Here is the main.tf:

terraform {
  required_providers {
    azurerm = {
      source  = "hashicorp/azurerm"
      version = "~>3.1"
    }

    azapi = {
      source  = "azure/azapi"
      version = "~>0.1"
    }
  }
}

provider "azurerm" {
  features {}
}

provider "azapi" {}

resource "azurerm_resource_group" "example" {
  name     = var.resource_group_name
  location = var.location
}

resource "azurerm_log_analytics_workspace" "central_workspace" {
  name                = "centralWorkspace"
  location            = azurerm_resource_group.example.location
  resource_group_name = azurerm_resource_group.example.name
  sku                 = "PerGB2018"
  retention_in_days   = 30
}

resource "azapi_resource" "example_dcr" {
  name      = "example"
  parent_id = azurerm_resource_group.example.id
  type      = "Microsoft.Insights/dataCollectionRules@2021-04-01"
  location  = var.location
  body = templatefile("example.dcr.json.tftpl", {
    "workspace_id" : azurerm_log_analytics_workspace.central_workspace.id
  })
}

resource "azapi_resource" "example_dcr_association" {
  name      = "example"
  parent_id = azurerm_linux_virtual_machine.example.id
  type      = "Microsoft.Insights/dataCollectionRuleAssociations@2021-04-01"
  body      = jsonencode({
    properties = {
      dataCollectionRuleId = azapi_resource.example_dcr.id
    }
  })
}

output "workspace_id" {
  value = azurerm_log_analytics_workspace.central_workspace.id
}

output "dcr_id" {
  value = azapi_resource.example_dcr.id
}

output "dcr_association_id" {
  value = azapi_resource.example_dcr_association.id
}

This functionality has been released in v3.15.0 of the Terraform Provider. Please see the Terraform documentation on provider versioning or reach out if you need any assistance upgrading.

For further feature requests or bug reports with this functionality, please create a new GitHub issue following the template. Thank you!

I'm going to lock this issue because it has been closed for 30 days ⏳. This helps our maintainers find and focus on the active issues.
If you have found a problem that seems similar to this, please open a new issue and complete the issue template so we can capture all the details necessary to investigate further.