_has_link() in lib/corosync.py returns wrong status in case of link state is Unknown
freishutz opened this issue · comments
Arthur Novik commented
on the host side:
[root@sfa7990-c0 ~]# ip link set up dev enp0s20f0u8u2c2 ; echo $?
0
[root@sfa7990-c0 ~]# ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
2: eno5: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UP group default qlen 1000
link/ether 00:01:ff:0d:00:40 brd ff:ff:ff:ff:ff:ff
inet 10.36.44.22/22 brd 10.36.47.255 scope global dynamic eno5
valid_lft 13617sec preferred_lft 13617sec
inet6 fe80::201:ffff:fe0d:40/64 scope link
valid_lft forever preferred_lft forever
3: eno6: <BROADCAST,MULTICAST> mtu 1500 qdisc mq state DOWN group default qlen 1000
link/ether 00:01:ff:4d:00:40 brd ff:ff:ff:ff:ff:ff
4: enp0s20f0u8u2c2: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UNKNOWN group default qlen 1000
link/ether 3a:18:3b:7c:53:19 brd ff:ff:ff:ff:ff:ff
inet6 fe80::3818:3bff:fe7c:5319/64 scope link
valid_lft forever preferred_lft forever
5: ib0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 2044 qdisc mq state UP group default qlen 256
link/infiniband 00:00:07:f9:fe:80:00:00:00:00:00:00:50:6b:4b:03:00:23:b7:cc brd 00:ff:ff:ff:ff:12:40:1b:ff:ff:00:00:00:00:00:00:ff:ff:ff:ff
inet 172.172.172.22/24 brd 172.172.172.255 scope global ib0
valid_lft forever preferred_lft forever
inet6 fe80::526b:4b03:23:b7cc/64 scope link
valid_lft forever preferred_lft forever
6: ib1: <BROADCAST,MULTICAST> mtu 4092 qdisc mq state DOWN group default qlen 256
link/infiniband 00:00:11:b0:fe:80:00:00:00:00:00:00:50:6b:4b:03:00:23:b7:cd brd 00:ff:ff:ff:ff:12:40:1b:ff:ff:00:00:00:00:00:00:ff:ff:ff:ff
[root@sfa7990-c0 ~]# cat /sys/class/net/enp0s20f0u8u2c2/carrier
1
[root@sfa7990-c0 ~]# cat /sys/class/net/enp0s20f0u8u2c2/operstate
unknown
root@sfa7990-c1 ~]# for nic in $(ls -1 /sys/class/net/); do echo $nic: $(readlink /sys/class/net/$nic/device/driver); done
eno5: ../../../../bus/pci/drivers/igb
eno6: ../../../../bus/pci/drivers/igb
enp0s20f0u8u2c2: ../../../../../../../bus/usb/drivers/cdc_ether
ib0: ../../../../bus/pci/drivers/mlx5_core
ib1: ../../../../bus/pci/drivers/mlx5_core
lo:
[root@sfa7990-c1 ~]# cat /sys/class/net/*/device/interface
CDC Notification Interface
[root@sfa7990-c1 ~]# modinfo cdc_ether
filename: /lib/modules/3.10.0-957.el7_lustre.x86_64/kernel/drivers/net/usb/cdc_ether.ko.xz
license: GPL
description: USB CDC Ethernet devices
author: David Brownell
retpoline: Y
rhelversion: 7.6
srcversion: D329B19ACE6E9677F544BB8
[root@sfa7990-c0 tmp]# python
Python 2.7.5 (default, Oct 30 2018, 23:45:53)
[GCC 4.8.5 20150623 (Red Hat 4.8.5-36)] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> import array, struct, fcntl, socket
>>> SIOCETHTOOL = 0x8946
>>> ETHTOOL_GLINK = 0x0000000a
>>> sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
>>> struct
struct
>>> ecmd = array.array('B', struct.pack('2I', ETHTOOL_GLINK, 0))
>>> ifreq = struct.pack('16sP', 'enp0s20f0u8u2c2', ecmd.buffer_info()[0])
>>> fcntl.ioctl(sock.fileno(), SIOCETHTOOL, ifreq)
'enp0s20f0u8u2c2\x000\x14J\x01\x00\x00\x00\x00'
>>> sock.close()
>>> struct.unpack('4xI', ecmd.tostring())[0]
1
>>> def _has_link(name):
... import array
... import struct
... import fcntl
... import socket
... SIOCETHTOOL = 0x8946
... ETHTOOL_GLINK = 0x0000000a
... sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
... ecmd = array.array('B', struct.pack('2I', ETHTOOL_GLINK, 0))
... ifreq = struct.pack('16sP', name, ecmd.buffer_info()[0])
... fcntl.ioctl(sock.fileno(), SIOCETHTOOL, ifreq)
... sock.close()
... return bool(struct.unpack('4xI', ecmd.tostring())[0])
...
>>>
>>>
>>>
>>> _has_link('ib0')
True
>>> _has_link('ib1')
False
>>> _has_link('lo')
True
>>> _has_link('enp0s20f0u8u2c2')
True
According to https://www.kernel.org/doc/Documentation/ABI/testing/sysfs-class-net interface could have next states :
What: /sys/class/net/<iface>/operstate
Date: March 2006
KernelVersion: 2.6.17
Contact: netdev@vger.kernel.org
Description:
Indicates the interface RFC2863 operational state as a string.
Possible values are:
"unknown", "notpresent", "down", "lowerlayerdown", "testing",
"dormant", "up".
I suggest to change code logic in a way when everything except 'up' return False.
Joe Grund commented
@freishutz Can we get access to a system where this is occuring?
Will Johnson commented
Hi @freishutz, I may have missed this but is there a system setup where I can get access to this? Thanks.
Will Johnson commented
Hi @freishutz, I have put together a fix that I believe will resolve this issue. Would it be possible to connect to the same cluster so I can test with the new rpm? I'm also attaching the binary to this ticket in case you want to try it out as well.
Will Johnson commented
Will Johnson commented