Compare commits
7 Commits
e5a0e2352d
...
eadcf6f296
Author | SHA1 | Date |
---|---|---|
Gregor Michels | eadcf6f296 | |
Gregor Michels | 7de03e6cd6 | |
Gregor Michels | 4904933475 | |
Gregor Michels | 2299e3aff1 | |
Gregor Michels | d1c1f34bf8 | |
Gregor Michels | 14df3e24df | |
Gregor Michels | d7206111fa |
|
@ -1474,3 +1474,145 @@ Tue Feb 28 08:44:16 2023 daemon.warn dnsmasq[1]: Maximum number of concurrent DN
|
||||||
* changed upstream dns to `9.9.9.9` (quad9) and `1.1.1.1` (cloudflare)
|
* changed upstream dns to `9.9.9.9` (quad9) and `1.1.1.1` (cloudflare)
|
||||||
|
|
||||||
see `a236643` for details
|
see `a236643` for details
|
||||||
|
|
||||||
|
|
||||||
|
041 2023.03.11 19:20 - 2023.03.13 20:30 (ADP) | broken management vpn tunnel
|
||||||
|
----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
```
|
||||||
|
root@gw-core01:~# date
|
||||||
|
Mon Mar 13 19:40:48 2023
|
||||||
|
root@gw-core01:~# wg
|
||||||
|
interface: wg0
|
||||||
|
public key: 1lYOjFZBY4WbaVmyWFuesVbgfFrfqDTnmAIrXTWLkh4=
|
||||||
|
private key: (hidden)
|
||||||
|
listening port: 51820
|
||||||
|
|
||||||
|
peer: 9j6aZs+ViG9d9xw8AofRo10FPosW6LpDIv0IHtqP4UM=
|
||||||
|
preshared key: (hidden)
|
||||||
|
endpoint: 162.55.53.85:51820
|
||||||
|
allowed ips: 0.0.0.0/0
|
||||||
|
latest handshake: 1 day, 23 hours, 55 minutes, 49 seconds ago
|
||||||
|
transfer: 1.17 GiB received, 16.71 GiB sent
|
||||||
|
persistent keepalive: every 15 seconds
|
||||||
|
root@gw-core01:~# ifdown wg0
|
||||||
|
root@gw-core01:~# ifup wg0
|
||||||
|
root@gw-core01:~# echo wg0 still not handshaking properly
|
||||||
|
root@gw-core01:~# uci delete network.wg0.listen_port
|
||||||
|
root@gw-core01:~# /etc/init.d/network reload
|
||||||
|
root@gw-core01:~# echo wg0 is up again !
|
||||||
|
root@gw-core01:~# uci commit network
|
||||||
|
```
|
||||||
|
|
||||||
|
042 2023.03.12 18:00 - 2023.03.22 19:30 (RGS) | `ap-1374` (`kitchen-og`) down
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
`ap-1374` is (mostly) down since 2023.03.12 18:00.
|
||||||
|
Neither the ethernet link nor the poe is coming up.
|
||||||
|
```
|
||||||
|
user@freifunk-admin:~$ date && ssh sax-rgs-sw-access02
|
||||||
|
Wed 15 Mar 2023 12:07:55 AM CET
|
||||||
|
[...]
|
||||||
|
sax-rgs-sw-access02# show logging buffered
|
||||||
|
|
||||||
|
Log messages in buffer
|
||||||
|
[...]
|
||||||
|
5;Feb 17 2000 05:37:36;%LINEPROTO-5-UPDOWN: Line protocol on GigabitEthernet0/7, changed state to down
|
||||||
|
4;Feb 17 2000 05:37:37;%TRUNK-4-INFO: Power-Over-Ethernet on gi0/7 Powered Down!
|
||||||
|
4;Feb 17 2000 05:37:48;%TRUNK-4-INFO: Power-Over-Ethernet on gi0/7: Detected Standard PD, Delivering power!
|
||||||
|
5;Feb 17 2000 05:37:54;%LINEPROTO-5-UPDOWN: Line protocol on GigabitEthernet0/7, changed state to up
|
||||||
|
5;Feb 17 2000 05:38:26;%LINEPROTO-5-UPDOWN: Line protocol on GigabitEthernet0/7, changed state to down
|
||||||
|
5;Feb 17 2000 05:38:28;%LINEPROTO-5-UPDOWN: Line protocol on GigabitEthernet0/7, changed state to up
|
||||||
|
5;Feb 17 2000 05:38:32;%LINEPROTO-5-UPDOWN: Line protocol on GigabitEthernet0/7, changed state to down
|
||||||
|
5;Feb 17 2000 05:38:35;%LINEPROTO-5-UPDOWN: Line protocol on GigabitEthernet0/7, changed state to up
|
||||||
|
5;Feb 17 2000 05:38:38;%LINEPROTO-5-UPDOWN: Line protocol on GigabitEthernet0/7, changed state to down
|
||||||
|
5;Feb 17 2000 05:38:59;%LINEPROTO-5-UPDOWN: Line protocol on GigabitEthernet0/7, changed state to up
|
||||||
|
5;Feb 20 2000 10:02:32;%LINEPROTO-5-UPDOWN: Line protocol on GigabitEthernet0/6, changed state to down
|
||||||
|
5;Feb 20 2000 10:02:35;%LINEPROTO-5-UPDOWN: Line protocol on GigabitEthernet0/6, changed state to up
|
||||||
|
5;Feb 24 2000 22:50:15;%LINEPROTO-5-UPDOWN: Line protocol on GigabitEthernet0/7, changed state to down
|
||||||
|
5;Feb 24 2000 22:50:15;%LINEPROTO-5-UPDOWN: Line protocol on GigabitEthernet0/7, changed state to up
|
||||||
|
5;Feb 24 2000 22:50:15;%LINEPROTO-5-UPDOWN: Line protocol on GigabitEthernet0/7, changed state to down
|
||||||
|
4;Feb 24 2000 22:50:18;%TRUNK-4-INFO: Power-Over-Ethernet on gi0/7 Powered Down!
|
||||||
|
5;Feb 25 2000 13:57:06;%LINEPROTO-5-UPDOWN: Line protocol on GigabitEthernet0/6, changed state to down
|
||||||
|
5;Feb 25 2000 13:57:09;%LINEPROTO-5-UPDOWN: Line protocol on GigabitEthernet0/6, changed state to up
|
||||||
|
4;Feb 26 2000 21:52:17;%TRUNK-4-INFO: Power-Over-Ethernet on gi0/7: Detected Standard PD, Delivering power!
|
||||||
|
5;Feb 26 2000 21:52:22;%LINEPROTO-5-UPDOWN: Line protocol on GigabitEthernet0/7, changed state to up
|
||||||
|
5;Feb 26 2000 21:52:54;%LINEPROTO-5-UPDOWN: Line protocol on GigabitEthernet0/7, changed state to down
|
||||||
|
5;Feb 26 2000 21:52:57;%LINEPROTO-5-UPDOWN: Line protocol on GigabitEthernet0/7, changed state to up
|
||||||
|
5;Feb 26 2000 21:53:01;%LINEPROTO-5-UPDOWN: Line protocol on GigabitEthernet0/7, changed state to down
|
||||||
|
5;Feb 26 2000 21:53:03;%LINEPROTO-5-UPDOWN: Line protocol on GigabitEthernet0/7, changed state to up
|
||||||
|
5;Feb 26 2000 21:53:06;%LINEPROTO-5-UPDOWN: Line protocol on GigabitEthernet0/7, changed state to down
|
||||||
|
5;Feb 26 2000 21:53:26;%LINEPROTO-5-UPDOWN: Line protocol on GigabitEthernet0/7, changed state to up
|
||||||
|
5;Feb 27 2000 00:31:56;%LINEPROTO-5-UPDOWN: Line protocol on GigabitEthernet0/7, changed state to down
|
||||||
|
4;Feb 27 2000 00:31:57;%TRUNK-4-INFO: Power-Over-Ethernet on gi0/7 Powered Down!
|
||||||
|
5;Feb 27 2000 05:24:48;%AAA-5-LOGIN: New ssh connection for user admin, source 10.86.254.0 ACCEPTED
|
||||||
|
6;Feb 27 2000 05:25:00;%AAA-6-INFO: User 'admin' enter privileged mode from ssh with level '15' success
|
||||||
|
sax-rgs-sw-access02# show clock
|
||||||
|
2000-02-27 05:43:37 Coordinated(UTC+0)
|
||||||
|
```
|
||||||
|
|
||||||
|
**needed fix**:
|
||||||
|
* check keystone modules on site
|
||||||
|
* also check module for `0/6` (there are some `ifInErrors`)
|
||||||
|
|
||||||
|
**additional work - set correct time on switches (done)**:
|
||||||
|
```
|
||||||
|
sax-rgs-sw-access0X> enable
|
||||||
|
sax-rgs-sw-access0X# configure terminal
|
||||||
|
sax-rgs-sw-access0X(config)# clock timezone CET +1
|
||||||
|
sax-rgs-sw-access0X(config)# clock set 00:26:15 mar 15 2023
|
||||||
|
sax-rgs-sw-access0X(config)# clock source ntp
|
||||||
|
sax-rgs-sw-access0X(config)# ntp server pool.ntp.org
|
||||||
|
sax-rgs-sw-access0X(config)# exit
|
||||||
|
sax-rgs-sw-access0X(config)# write
|
||||||
|
```
|
||||||
|
|
||||||
|
**disable port till fix is there - done 16.03.2023 00:40**:
|
||||||
|
```
|
||||||
|
sax-rgs-sw-access02> enable
|
||||||
|
sax-rgs-sw-access02# configure terminal
|
||||||
|
sax-rgs-sw-access02(config-if-GigabitEthernet0/7)# no poe enable
|
||||||
|
sax-rgs-sw-access02(config-if-GigabitEthernet0/7)# exit
|
||||||
|
sax-rgs-sw-access02(config)# exit
|
||||||
|
sax-rgs-sw-access02# write
|
||||||
|
```
|
||||||
|
|
||||||
|
**actual fix - done 22.03.2023**:
|
||||||
|
* reterminate keystone modules for both links (`GigabitEthernet0/6` and `GigabitEthernet0/7`)
|
||||||
|
* reenable poe on `GigabitEthernet0/7`
|
||||||
|
* test by
|
||||||
|
* resetting link counters on `sax-rgs-sw-access02`
|
||||||
|
* ` iperf3` from ap to core gateway (bidirectional)
|
||||||
|
* looking at the counters again
|
||||||
|
|
||||||
|
|
||||||
|
043 2023.03.20 01:30 | (maintenance) update eae-adp-jump01
|
||||||
|
----------------------------------------------------------
|
||||||
|
|
||||||
|
```
|
||||||
|
syspatch
|
||||||
|
pkg_add -uU
|
||||||
|
reboot
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
044 2023.03.25 23:45 - 2023.03.26 13:00 (ANS) | broken upstream
|
||||||
|
---------------------------------------------------------------
|
||||||
|
|
||||||
|
`ffl-ans-gw-core01` hasn't handshaked with `eae-adp-jump01` since 2023.03.25 at around 23:45.
|
||||||
|
Additionally the facility management called and said that there was "no internet" on site.
|
||||||
|
|
||||||
|
The facility management will drive to the ANS and check in with me to talk about the next steps
|
||||||
|
|
||||||
|
**solution**: after power cycling the gigacube the upstream came back
|
||||||
|
|
||||||
|
|
||||||
|
045 2023.04.01 - 2023.04.02 (ANS) | fibre cut to tent-1
|
||||||
|
-------------------------------------------------------
|
||||||
|
|
||||||
|
**issue**: fibre cut from `facility management` container to `tent-1`
|
||||||
|
|
||||||
|
**solution**: replace fibre with outdoor copper cable
|
||||||
|
|
||||||
|
**dicussion**:
|
||||||
|
* longterm: replace copper with fibre
|
||||||
|
|
|
@ -62,14 +62,29 @@ groups:
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
annotations:
|
annotations:
|
||||||
summary: A switch port changed it's state {{ $value }}x time
|
summary: "{{ $labels.ifName }} on {{ $labels.instance }} changed it's state {{ $value }}x time(s) in the last 2 hours"
|
||||||
description: "For some reason a switch port changed it's state\n LABELS = {{ $labels }}"
|
description: "This alarm will clear in 2 hours"
|
||||||
|
|
||||||
- alert: SNMPNodeRebooted
|
- alert: PortIfInErrors
|
||||||
expr: (sysUpTime / 100) <= (60 * 60 * 2)
|
expr: increase(ifInErrors[2h]) > 0 or increase(node_network_receive_errs_total[2h]) > 0
|
||||||
for: 0m
|
|
||||||
labels:
|
labels:
|
||||||
severity: critical
|
severity: critical
|
||||||
annotations:
|
annotations:
|
||||||
summary: A snmp node rebooted in the last 2 hours (instance {{ $labels.instance }})
|
summary: "{{ if $labels.ifName }} {{ $labels.ifName }} {{ else }} {{ $labels.device }} {{ end }} on {{ $labels.instance }} has {{ $value }} ifInErrors in the last 2 hours. This alarm will clear automatically in 2 hours"
|
||||||
description: "The uptime of a snmp node changed in the last two hours. VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
description: "For some reason the port is throwing ifInErrors"
|
||||||
|
|
||||||
|
- alert: PortIfOutErrors
|
||||||
|
expr: increase(ifOutErrors[2h]) > 0 or increase(node_network_transmit_errs_total[2h]) > 0
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
annotations:
|
||||||
|
summary: "{{ if $labels.ifName }} {{ $labels.ifName }} {{ else }} {{ $labels.device }} {{ end }} on {{ $labels.instance }} has {{ $value }} ifOutErrors in the last 2 hours"
|
||||||
|
description: "For some reason the port is throwing ifOutErrors. This alarm will clear automatically in 2 hours"
|
||||||
|
|
||||||
|
- alert: SNMPNodeRebooted
|
||||||
|
expr: (sysUpTime / 100) <= (60 * 60 * 2)
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
annotations:
|
||||||
|
summary: "{{ $labels.instance }} rebooted at least one time in the last two hours"
|
||||||
|
description: "This alarm will clear in 2 hours"
|
||||||
|
|
|
@ -91,7 +91,6 @@ config interface 'backoffice'
|
||||||
config interface 'wg0'
|
config interface 'wg0'
|
||||||
option proto 'wireguard'
|
option proto 'wireguard'
|
||||||
option private_key "{{ lookup('passwordstore', 'wg/wg0/gw-core01') }}"
|
option private_key "{{ lookup('passwordstore', 'wg/wg0/gw-core01') }}"
|
||||||
option listen_port 51820
|
|
||||||
option mtu 1350
|
option mtu 1350
|
||||||
list addresses '10.84.254.1/31'
|
list addresses '10.84.254.1/31'
|
||||||
|
|
||||||
|
|
Reference in New Issue