File: //proc/self/root/usr/share/netplan/netplan_cli/cli/sriov.py
#!/usr/bin/python3
#
# Copyright (C) 2020-2022 Canonical, Ltd.
# Author: Łukasz 'sil2100' Zemczak <lukasz.zemczak@canonical.com>
# Author: Lukas Märdian <slyon@ubuntu.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 3.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
import json
import logging
import os
import subprocess
import typing
from typing import Dict, List, Optional, Set
from . import utils
from ..configmanager import ConfigurationError
import netplan
# PCIDevice class originates from mlnx_switchdev_mode/sriovify.py
# Copyright 2019 Canonical Ltd, Apache License, Version 2.0
# https://github.com/openstack-charmers/mlnx-switchdev-mode
class PCIDevice(object):
    """Helper class for interaction with a PCI device"""
    def __init__(self, pci_addr: str):
        """Initialise a new PCI device handler
        :param pci_addr: PCI address of device
        :type: str
        """
        self.pci_addr = pci_addr
    @property
    def sys(self) -> str:
        """sysfs path (can be overridden for testing)
        :return: full path to /sys filesystem
        :rtype: str
        """
        return "/sys"
    @property
    def path(self) -> str:
        """/sys path for PCI device
        :return: full path to PCI device in /sys filesystem
        :rtype: str
        """
        return os.path.join(self.sys, "bus/pci/devices", self.pci_addr)
    def subpath(self, subpath: str) -> str:
        """/sys subpath helper for PCI device
        :param subpath: subpath to construct path for
        :type: str
        :return: self.path + subpath
        :rtype: str
        """
        return os.path.join(self.path, subpath)
    @property
    def driver(self) -> str:
        """Kernel driver for PCI device
        :return: kernel driver in use for device
        :rtype: str
        """
        driver = ''
        if os.path.exists(self.subpath("driver")):
            driver = os.path.basename(os.readlink(self.subpath("driver")))
        return driver
    @property
    def bound(self) -> bool:
        """Determine if device is bound to a kernel driver
        :return: whether device is bound to a kernel driver
        :rtype: bool
        """
        return os.path.exists(self.subpath("driver"))
    @property
    def is_pf(self) -> bool:
        """Determine if device is a SR-IOV Physical Function
        :return: whether device is a PF
        :rtype: bool
        """
        return os.path.exists(self.subpath("sriov_numvfs"))
    @property
    def is_vf(self) -> bool:
        """Determine if device is a SR-IOV Virtual Function
        :return: whether device is a VF
        :rtype: bool
        """
        return os.path.exists(self.subpath("physfn"))
    @property
    def vf_addrs(self) -> list:
        """List Virtual Function addresses associated with a Physical Function
        :return: List of PCI addresses of Virtual Functions
        :rtype: list[str]
        """
        vf_addrs = []
        i = 0
        while True:
            try:
                vf_addrs.append(
                    os.path.basename(
                        os.readlink(self.subpath("virtfn{}".format(i)))
                    )
                )
            except FileNotFoundError:
                break
            i += 1
        return vf_addrs
    @property
    def vfs(self) -> list:
        """List Virtual Function associated with a Physical Function
        :return: List of PCI devices of Virtual Functions
        :rtype: list[PCIDevice]
        """
        return [PCIDevice(addr) for addr in self.vf_addrs]
    def devlink_set(self, obj_name: str, prop: str, value: str):
        """Set devlink options for the PCI device
        :param obj_name: devlink object to set options on
        :type: str
        :param prop: property to set
        :type: str
        :param value: value to set for property
        :type: str
        """
        subprocess.check_call(
            [
                "/sbin/devlink",
                "dev",
                obj_name,
                "set",
                "pci/{}".format(self.pci_addr),
                prop,
                value,
            ]
        )
    def devlink_eswitch_mode(self) -> str:
        """Query eswitch mode via devlink for the PCI device
        :return: the eswitch mode or '__undetermined' if it can't be retrieved
        :rtype: str
        """
        pci = f"pci/{self.pci_addr}"
        try:
            output = subprocess.check_output(
                [
                    "/sbin/devlink",
                    "-j",
                    "dev",
                    "eswitch",
                    "show",
                    pci,
                ],
                stderr=subprocess.DEVNULL,
            )
        except subprocess.CalledProcessError:
            return '__undetermined'
        json_output = json.loads(output)
        # The JSON document looks like this when the 'mode' is available:
        # {"dev":{"pci/0000:03:00.0":{"mode":"switchdev"}}}
        # and like this when it's not available
        # {"dev":{}}
        return json_output.get("dev", {}).get(pci, {}).get('mode', '__undetermined')
    def __str__(self) -> str:
        """String represenation of object
        :return: PCI address of string
        :rtype: str
        """
        return self.pci_addr
def bind_vfs(vfs: typing.Iterable[PCIDevice], driver):
    """Bind unbound VFs to driver."""
    bound_vfs = []
    for vf in vfs:
        if not vf.bound:
            with open("/sys/bus/pci/drivers/{}/bind".format(driver), "wt") as f:
                f.write(vf.pci_addr)
                bound_vfs.append(vf)
    return bound_vfs
def unbind_vfs(vfs: typing.Iterable[PCIDevice], driver) -> typing.Iterable[PCIDevice]:
    """Unbind bound VFs from driver."""
    unbound_vfs = []
    for vf in vfs:
        if vf.bound:
            with open("/sys/bus/pci/drivers/{}/unbind".format(driver), "wt") as f:
                f.write(vf.pci_addr)
                unbound_vfs.append(vf)
    return unbound_vfs
def _interface_matches(netdef: netplan.NetDefinition, interface: str) -> bool:
    return netdef._match_interface(
        iface_name=interface,
        iface_driver=utils.get_interface_driver_name(interface),
        iface_mac=utils.get_interface_macaddress(interface))
def _get_interface_name_for_netdef(netdef: netplan.NetDefinition) -> Optional[str]:
    """
    Try to match a netdef with the real system network interface.
    Throws ConfigurationError if there is more than one match.
    """
    interfaces: List[str] = utils.get_interfaces()
    if netdef._has_match:
        # now here it's a bit tricky
        set_name: str = netdef.set_name
        if set_name and set_name in interfaces:
            # if we had a match: stanza and set-name: this means we should
            # assume that, if found, the interface has already been
            # renamed - use the new name
            return set_name
        else:
            matches: Set[str] = set()
            # we walk through all the system interfaces to determine if there is
            # more than one matched interface
            for interface in interfaces:
                if not _interface_matches(netdef, interface):
                    continue
                # we have a matching PF
                # error out if we matched more than one
                if len(matches) > 1:
                    raise ConfigurationError('matched more than one interface for a PF device: %s' % netdef.id)
                matches.add(interface)
            if matches:
                return list(matches)[0]
    else:
        # no match field, assume entry name is the interface name
        if netdef.id in interfaces:
            return netdef.id
    return None
def _get_pci_slot_name(netdev):
    """
    Read PCI slot name for given interface name
    """
    uevent_path = os.path.join('/sys/class/net', netdev, 'device/uevent')
    try:
        with open(uevent_path) as f:
            pci_slot_name = None
            for line in f.readlines():
                line = line.strip()
                if line.startswith('PCI_SLOT_NAME='):
                    pci_slot_name = line.split('=', 2)[1]
                    return pci_slot_name
    except IOError as e:
        raise RuntimeError('failed parsing PCI slot name for %s: %s' % (netdev, str(e)))
def _get_physical_functions(np_state: netplan.State) -> Dict[str, str]:
    """
    Go through the list of netplan ethernet devices and identify which are
    PFs matching them with actual network interfaces.
    """
    pfs = {}
    for netdef in np_state.ethernets.values():
        # If the sriov_link is present, the interface is a VF and link is the PF
        if link := netdef.links.get('sriov'):
            if iface := _get_interface_name_for_netdef(np_state[link.id]):
                pfs[link.id] = iface
        else:
            # If a netdef also defines the embedded_switch_mode key we consider it's a PF
            # This enables us to change the eswitch mode even when the PF has no VFs.
            if netdef._embedded_switch_mode:
                if iface := _get_interface_name_for_netdef(netdef):
                    pfs[netdef.id] = iface
            # If the netdef has any (positive) number of VFs that's because it's a PF
            try:
                count = netdef._vf_count
            except netplan.NetplanException as e:
                raise ConfigurationError(str(e))
            if count > 0:
                if iface := _get_interface_name_for_netdef(netdef):
                    pfs[netdef.id] = iface
    return pfs
def _get_vf_number_per_pf(np_state: netplan.State) -> Dict[str, int]:
    """
    Go through the list of netplan ethernet devices and identify which ones
    have VFs. netdef._vf_count ultimately calls _netplan_state_get_vf_count_for_def
    from libnetplan which return MAX(sriov_explicit_vf_count, number of VF netdefs).
    """
    vf_counts = {}
    for netdef in np_state.ethernets.values():
        try:
            count = netdef._vf_count
        except netplan.NetplanException as e:
            raise ConfigurationError(str(e))
        if count > 0:
            if iface := _get_interface_name_for_netdef(netdef):
                vf_counts[iface] = count
    return vf_counts
def _get_virtual_functions(np_state: netplan.State) -> Set[str]:
    """
    Go through the list of netplan ethernet devices and identify which ones
    are virtual functions
    """
    vfs = set()
    for netdef in np_state.ethernets.values():
        # If the sriov_link is present and the PF is also present in the system we save the VF
        if link := netdef.links.get('sriov'):
            if _get_interface_name_for_netdef(np_state[link.id]):
                vfs.add(netdef.id)
    return vfs
def set_numvfs_for_pf(pf, vf_count):
    """
    Allocate the required number of VFs for the selected PF.
    """
    if vf_count > 256:
        raise ConfigurationError(
            'cannot allocate more VFs for PF %s than the SR-IOV maximum: %s > 256' % (pf, vf_count))
    devdir = os.path.join('/sys/class/net', pf, 'device')
    numvfs_path = os.path.join(devdir, 'sriov_numvfs')
    totalvfs_path = os.path.join(devdir, 'sriov_totalvfs')
    try:
        with open(totalvfs_path) as f:
            vf_max = int(f.read().strip())
    except IOError as e:
        raise RuntimeError('failed parsing sriov_totalvfs for %s: %s' % (pf, str(e)))
    except ValueError:
        raise RuntimeError('invalid sriov_totalvfs value for %s' % pf)
    if vf_count > vf_max:
        raise ConfigurationError(
            'cannot allocate more VFs for PF %s than supported: %s > %s (sriov_totalvfs)' % (pf, vf_count, vf_max))
    try:
        with open(numvfs_path, 'w') as f:
            f.write(str(vf_count))
    except IOError as e:
        bail = True
        if e.errno == 16:  # device or resource busy
            logging.warning('device or resource busy while setting sriov_numvfs for %s, trying workaround' % pf)
            try:
                # doing this in two open/close sequences so that
                # it's as close to writing via shell as possible
                with open(numvfs_path, 'w') as f:
                    f.write('0')
                with open(numvfs_path, 'w') as f:
                    f.write(str(vf_count))
            except IOError as e_inner:
                e = e_inner
            else:
                bail = False
        if bail:
            raise RuntimeError('failed setting sriov_numvfs to %s for %s: %s' % (vf_count, pf, str(e)))
    return True
def perform_hardware_specific_quirks(pf):
    """
    Perform any hardware-specific quirks for the given SR-IOV device to make
    sure all the VF-count changes are applied.
    """
    devdir = os.path.join('/sys/class/net', pf, 'device')
    try:
        with open(os.path.join(devdir, 'vendor')) as f:
            device_id = f.read().strip()[2:]
        with open(os.path.join(devdir, 'device')) as f:
            vendor_id = f.read().strip()[2:]
    except IOError as e:
        raise RuntimeError('could not determine vendor and device ID of %s: %s' % (pf, str(e)))
    combined_id = ':'.join([vendor_id, device_id])
    quirk_devices = ()  # TODO: add entries to the list
    if combined_id in quirk_devices:  # pragma: nocover (empty quirk_devices)
        # some devices need special handling, so this is the place
        # Currently this part is empty, but has been added as a preemptive
        # measure, as apparently a lot of SR-IOV cards have issues with
        # dynamically allocating VFs. Some cards seem to require a full
        # kernel module reload cycle after changing the sriov_numvfs value
        # for the changes to come into effect.
        # Any identified card/vendor can then be special-cased here, if
        # needed.
        pass
def apply_vlan_filter_for_vf(pf, vf, vlan_name, vlan_id, prefix='/'):
    """
    Apply the hardware VLAN filtering for the selected VF.
    """
    # this is more complicated, because to do this, we actually need to have
    # the vf index - just knowing the vf interface name is not enough
    vf_index = None
    # the prefix argument is here only for unit testing purposes
    vf_devdir = os.path.join(prefix, 'sys/class/net', vf, 'device')
    vf_dev_id = os.path.basename(os.readlink(vf_devdir))
    pf_devdir = os.path.join(prefix, 'sys/class/net', pf, 'device')
    for f in os.listdir(pf_devdir):
        if 'virtfn' in f:
            dev_path = os.path.join(pf_devdir, f)
            dev_id = os.path.basename(os.readlink(dev_path))
            if dev_id == vf_dev_id:
                vf_index = f[6:]
                break
    if not vf_index:
        raise RuntimeError(
            'could not determine the VF index for %s while configuring vlan %s' % (vf, vlan_name))
    # now, create the VLAN filter
    # TODO: would be best if we did this directl via python, without calling
    #  the iproute tooling
    try:
        subprocess.check_call(['ip', 'link', 'set',
                               'dev', pf,
                               'vf', vf_index,
                               'vlan', str(vlan_id)],
                              stdout=subprocess.DEVNULL,
                              stderr=subprocess.DEVNULL)
    except subprocess.CalledProcessError:
        raise RuntimeError(
            'failed setting SR-IOV VLAN filter for vlan %s (ip link set command failed)' % vlan_name)
def apply_sriov_config(config_manager, rootdir='/'):
    """
    Go through all interfaces, identify which ones are SR-IOV VFs, create
    them and perform all other necessary setup.
    """
    config_manager.parse()
    interfaces = utils.get_interfaces()
    np_state = config_manager.np_state
    # for sr-iov devices, we identify VFs by them having a link: field
    # pointing to an PF. So let's browse through all ethernet devices,
    # find all that are VFs and count how many of those are linked to
    # particular PFs, as we need to then set the numvfs for each.
    vf_counts = _get_vf_number_per_pf(np_state)
    # we also store all matches between VF/PF netplan entry names and
    # interface that they're currently matching to
    vfs_set = _get_virtual_functions(np_state)
    pfs = _get_physical_functions(np_state)
    # setup the required number of VFs per PF
    # at the same time store which PFs got changed in case the NICs
    # require some special quirks for the VF number to change
    vf_count_changed = []
    if vf_counts:
        for pf, vf_count in vf_counts.items():
            if not set_numvfs_for_pf(pf, vf_count):
                continue
            vf_count_changed.append(pf)
    if vf_count_changed:
        # some cards need special treatment when we want to change the
        # number of enabled VFs
        for pf in vf_count_changed:
            perform_hardware_specific_quirks(pf)
        # also, since the VF number changed, the interfaces list also
        # changed, so we need to refresh it
        interfaces = utils.get_interfaces()
    # now in theory we should have all the new VFs set up and existing;
    # this is needed because we will have to now match the defined VF
    # entries to existing interfaces, otherwise we won't be able to set
    # filtered VLANs for those.
    # XXX: does matching those even make sense?
    vfs = {}
    for vf in vfs_set:
        netdef = np_state[vf]
        if netdef._has_match:
            # right now we only match by name, as I don't think matching per
            # driver and/or macaddress makes sense
            # TODO: print warning if other matches are provided
            for interface in interfaces:
                if netdef._match_interface(iface_name=interface):
                    if vf in vfs and vfs[vf]:
                        raise ConfigurationError('matched more than one interface for a VF device: %s' % vf)
                    vfs[vf] = interface
        else:
            if vf in interfaces:
                vfs[vf] = vf
    # Walk the SR-IOV PFs and check if we need to change the eswitch mode
    for netdef_id, iface in pfs.items():
        netdef = np_state[netdef_id]
        eswitch_mode = netdef._embedded_switch_mode
        if eswitch_mode in ['switchdev', 'legacy']:
            pci_addr = _get_pci_slot_name(iface)
            pcidev = PCIDevice(pci_addr)
            current_eswitch_mode_system = pcidev.devlink_eswitch_mode()
            if eswitch_mode != current_eswitch_mode_system:
                if pcidev.is_pf:
                    logging.debug("Found VFs of {}: {}".format(pcidev, pcidev.vf_addrs))
                    if pcidev.vfs:
                        try:
                            unbind_vfs(pcidev.vfs, pcidev.driver)
                        except Exception as e:
                            logging.warning(f'Unbinding of VFs for {netdef_id} failed: {str(e)}')
                    logging.debug(f'Changing eswitch mode from {current_eswitch_mode_system} to {eswitch_mode} for: {netdef_id}')
                    pcidev.devlink_set('eswitch', 'mode', eswitch_mode)
                    if pcidev.vfs:
                        if not netdef._delay_virtual_functions_rebind:
                            bind_vfs(pcidev.vfs, pcidev.driver)
    filtered_vlans_set = set()
    for vlan, netdef in np_state.vlans.items():
        # there is a special sriov vlan renderer that one can use to mark
        # a selected vlan to be done in hardware (VLAN filtering)
        if netdef._has_sriov_vlan_filter:
            # this only works for SR-IOV VF interfaces
            link = netdef.links.get('vlan')
            vlan_id = netdef._vlan_id
            vf = vfs.get(link.id)
            if not vf:
                # it is possible this is not an error, for instance when
                # the configuration has been defined 'for the future'
                # XXX: but maybe we should error out here as well?
                logging.warning(
                    'SR-IOV vlan defined for %s but link %s is either not a VF or has no matches' % (vlan, link.id))
                continue
            # get the parent pf interface
            # first we fetch the related vf netplan entry
            # and finally, get the matched pf interface
            pf = pfs.get(link.links.get('sriov').id)
            if vf in filtered_vlans_set:
                raise ConfigurationError(
                    'interface %s for netplan device %s (%s) already has an SR-IOV vlan defined' % (vf, link.id, vlan))
            # TODO: make sure that we don't apply the filter twice
            apply_vlan_filter_for_vf(pf, vf, vlan, vlan_id)
            filtered_vlans_set.add(vf)