Skip to content

Azure cni networking #135

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Oct 14, 2020
13 changes: 13 additions & 0 deletions azurerm/_modules/aks/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,19 @@ resource "azurerm_kubernetes_cluster" "current" {

vm_size = var.default_node_pool_vm_size
os_disk_size_gb = var.default_node_pool_os_disk_size_gb

vnet_subnet_id = var.network_plugin == "azure" ? azurerm_subnet.current[0].id : null
max_pods = var.max_pods
}

network_profile {
network_plugin = var.network_plugin
network_policy = var.network_policy

docker_bridge_cidr = "172.17.0.1/16"
service_cidr = var.service_cidr
dns_service_ip = var.dns_service_ip
pod_cidr = var.network_plugin == "azure" ? null : var.pod_cidr
}

service_principal {
Expand Down
3 changes: 3 additions & 0 deletions azurerm/_modules/aks/output.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
output "aks_vnet" {
value = length(azurerm_virtual_network.current) > 0 ? azurerm_virtual_network.current[0] : null
}
45 changes: 45 additions & 0 deletions azurerm/_modules/aks/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,57 @@ variable "resource_group" {
description = "Resource group to use for the cluster."
}

variable "vnet_address_space" {
type = list(string)
description = "Address space for the virtual network."
}

variable "subnet_address_prefixes" {
type = list(string)
description = "Address space for the cluster subnet."
}

variable "subnet_service_endpoints" {
type = list(string)
description = "List of service endpoints to expose on the subnet."
}

variable "network_plugin" {
type = string
description = "Network plugin to use. Set to 'azure' for CNI; 'kubenet' for Basic"
}

variable "network_policy" {
type = string
description = "Network policy to use. Set to 'azure' for CNI; 'calico' for Basic"
}

variable "dns_prefix" {
type = string
description = "DNS prefix of AKS cluster API server."
default = "api"
}

variable "service_cidr" {
type = string
description = "CIDR range for kubernetes service."
}

variable "dns_service_ip" {
type = string
description = "IP address for the kube-dns service. Must be within service_cidr."
}

variable "pod_cidr" {
type = string
description = "CIDR range for pods."
}

variable "max_pods" {
type = number
description = "Maximum number of pods to run per node, if using CNI for networking."
}

variable "default_node_pool_name" {
type = string
description = "Name of default node pool."
Expand Down
20 changes: 20 additions & 0 deletions azurerm/_modules/aks/vnet.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@

resource "azurerm_virtual_network" "current" {
count = var.network_plugin == "azure" ? 1 : 0

name = "vnet-aks-${terraform.workspace}-cluster"
address_space = var.vnet_address_space
resource_group_name = data.azurerm_resource_group.current.name
location = data.azurerm_resource_group.current.location
}

resource "azurerm_subnet" "current" {
count = var.network_plugin == "azure" ? 1 : 0

name = "aks-node-subnet"
address_prefixes = var.subnet_address_prefixes
resource_group_name = data.azurerm_resource_group.current.name
virtual_network_name = azurerm_virtual_network.current[0].name

service_endpoints = var.subnet_service_endpoints == [""] ? null: var.subnet_service_endpoints
}
11 changes: 11 additions & 0 deletions azurerm/cluster/configuration.tf
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,17 @@ locals {

dns_prefix = lookup(local.cfg, "dns_prefix", "api")

vnet_address_space = split(",", lookup(local.cfg, "vnet_address_space", "10.0.0.0/8"))
subnet_address_prefixes = split(",", lookup(local.cfg, "subnet_address_prefixes", "10.1.0.0/16"))
subnet_service_endpoints = split(",", lookup(local.cfg, "subnet_service_endpoints", ""))

network_plugin = lookup(local.cfg, "network_plugin", "kubenet")
network_policy = lookup(local.cfg, "network_policy", "calico")
service_cidr = lookup(local.cfg, "service_cidr", "10.0.0.0/16")
dns_service_ip = lookup(local.cfg, "dns_service_ip", "10.0.0.10")
pod_cidr = lookup(local.cfg, "pod_cidr", "10.244.0.0/16")
max_pods = lookup(local.cfg, "max_pods", null)

default_node_pool_name = lookup(local.cfg, "default_node_pool_name", "default")
default_node_pool_type = lookup(local.cfg, "default_node_pool_type", "VirtualMachineScaleSets")

Expand Down
11 changes: 11 additions & 0 deletions azurerm/cluster/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,17 @@ module "cluster" {

dns_prefix = local.dns_prefix

vnet_address_space = local.vnet_address_space
subnet_address_prefixes = local.subnet_address_prefixes
subnet_service_endpoints = local.subnet_service_endpoints

network_plugin = local.network_plugin
network_policy = local.network_policy
service_cidr = local.service_cidr
dns_service_ip = local.dns_service_ip
pod_cidr = local.pod_cidr
max_pods = local.max_pods

default_node_pool_name = local.default_node_pool_name
default_node_pool_type = local.default_node_pool_type

Expand Down
3 changes: 3 additions & 0 deletions azurerm/cluster/output.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
output "aks_vnet" {
value = module.cluster.aks_vnet
}
3 changes: 3 additions & 0 deletions quickstart/src/configurations/aks/clusters.tf
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,7 @@ module "aks_zero" {
source = "github.com/kbst/terraform-kubestack//azurerm/cluster?ref={{version}}"

configuration = var.clusters["aks_zero"]

# vnet_subnet_id = azurerm_subnet.external.id # uncomment and populate with an externally-created
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Up to now, each cluster module has only the three common module attributes.

All additional configuration is wrapped inside the configuration attribute, which allows the inheritance between the environments.

By adding it here, you can reference the ID from another TF resource, but lose the ability to leverage inheritance to have different IDs per environment.

I generally err on the side of keeping the API of the provider specific cluster modules the same.

What options do we have?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was not happy with adding this attribute either, and first tried to see if there was a way to retrieve the external subnet id dynamically into the aks_zero map, but it seems Terraform doesn't allow anything other than literals in config.auto.tfvars.

With the current approach, it is possible to include strategic use of ${terraform.workspace} in the azurerm_subnet.external definition to ensure a separate vnet per environment, but that requires the user to understand what's needed and how it will interact with kubestack.

Perhaps moving the vnet definition into the _modules/aks folder would be a solution, but the trade-off would be that some of the reason I wanted to define the vnet outside of kubestack and pass it in somehow is that I may also want to add service endpoints or delegations to the vnet or subnet to allow access to other Azure-specific resources, such as Key Vaults, Container Registries, CosmosDB, etc.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

However, it'd certainly be possible to include a service_endpoints key in config.auto.tfvars to populate the same key in the subnet. Is there any concern about this change adding so many optional-but-needed-for-CNI keys to config.auto.tfvars?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Defining the vnet inside the module seems like it will limit users too much. So configuring it outside and passing the ID in seems the best approach based on what we know so far and is what you implemented so far. Maybe a good compromise is to simply move the attribute inside the configuration with the other ones for now and then document that it is important to overwrite this per environment.

This is a limitation of the current way how the inheritance is implemented. I think there may be better ways, this was built pre 0.12. But I haven't looked into it and it would certainly be a bigger change so it should be tackled in its own PR.

What do you think about moving it into the configuration for now. Then I can start testing this and if everything is fine I can merge it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When you suggest moving the attribute inside the configuration, what attribute are you referring to specifically? If you mean the vnet_subnet_id, it's referring to a value that is not known until the resources are created. I get the following when I try to point to that ID within config.auto.tfvars:

image

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, the value would need to be the hardcoded string of the id inside the tfvars file. It can't be a reference, that's what the error says.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So the vision I've been pursuing is one where I can maintain my infrastructure entirely with terraform, which includes PaaS data services like CosmosDB or MSSQL, as well as Virtual Network Gateways to provide access to on-premises resources. For these resources external to Kubestack, my intention was to maintain the apps/ops/loc workspaces, and ensure that the external resources are created so that they are isolated appropriately in the various environments. Putting the subnet ID in config means that at best, the external resources have to live in a separate terraform module/project, and cannot be deployed in a single terraform apply with the rest of the kubestack resources.

Given this, I would be more interested in moving the custom vnet into the module and exposing it via outputs to allow users to associate it to other PaaS resources, if needed/desired.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am fine with that too.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have made this update to the PR. Configuration variables now contain a set of variables that are used to create a vnet and subnet, and to pass them into the azurerm_kubernetes_cluster resource appropriately.

I also engaged with Microsoft support with a question on the behavior of the NSG, and learned that when using Advanced Networking in a cluster, the NSG gets associated with the VM Scale Set's NICs, rather than the subnet generally. This meant that we can safely remove the work-around script that was present in previous iterations of this PR.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds great, I'll take a look tomorrow.

# subnet's ID when using CNI/advanced networking
}
12 changes: 12 additions & 0 deletions quickstart/src/configurations/aks/config.auto.tfvars
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,18 @@ clusters = {

# The Azure resource group to use
resource_group = ""

# CNI/Advanced networking configuration parameters.
# Leave commented for default 'kubenet' networking
# vnet_address_space = "10.16.0.0/12" # accepts multiple comma-separated values
# subnet_address_prefixes = "10.18.0.0/16" # accepts multiple comma-separated values
# subnet_service_endpoints = null # accepts multiple comma-separated values

# network_plugin = "azure"
# network_policy = "azure"
# service_cidr = "10.0.0.0/16"
# dns_service_ip = "10.0.0.10"
# max_pods = 30
}

# ops environment, inherrits from apps
Expand Down