Skip to content

Commit

Permalink
🚸 对 Agent 的信息上报增加超时(可能)增加在 Windows 上的稳定性
Browse files Browse the repository at this point in the history
  • Loading branch information
naiba committed Jun 12, 2021
1 parent 6999acc commit 373b95a
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 12 deletions.
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@

\>> 交流论坛:[打杂社区](https://daza.net/c/nezha) (Lemmy)

\>> QQ 交流群:872069346 **加群要求:已搭建好哪吒监控 & 有 2+ 服务器**<br>
群友互助/服务器交流,作者不答疑,找 naiba 请至论坛发帖
\>> QQ 交流群:872069346 **加群要求:已搭建好哪吒监控 & 有 2+ 服务器**

\>> [我们的用户](https://www.google.com/search?q="powered+by+哪吒监控%7C哪吒面板"&filter=0) (Google)

Expand Down Expand Up @@ -103,7 +102,7 @@ URL 里面也可放置占位符,请求时会进行简单的字符串替换。
- net_in_speed(入站网速)、net_out_speed(出站网速)、net_all_speed(双向网速)、transfer_in(入站流量)、transfer_out(出站流量)、transfer_all(双向流量):Min/Max 数值为字节(1kb=1024,1mb = 1024\*1024)
- offline:不支持 Min/Max 参数
- Duration:持续秒数,监控比较简陋,取持续时间内的 70% 采样结果
- Ignore: `{"1": true, "2":false}` 忽略此规则的服务器 ID 列表
- Ignore: `{"1": true, "2":false}` 忽略此规则的服务器 ID 列表,比如忽略服务器 ID 5 的离线通知 `[{"Type":"offline","Duration":10, "Ignore":{"5": true}}]`
</details>

<details>
Expand Down Expand Up @@ -198,7 +197,7 @@ URL 里面也可放置占位符,请求时会进行简单的字符串替换。
<details>
<summary>如何使 OpenWrt/LEDE 自启动?来自 @艾斯德斯</summary>

首先在 release 下载对应的二进制解压tar.gz包后放置到 `/root`,然后 `chmod +x /root/nezha-agent` 赋予执行权限,然后创建 `/etc/init.d/nezha-service`
首先在 release 下载对应的二进制解压 tar.gz 包后放置到 `/root`,然后 `chmod +x /root/nezha-agent` 赋予执行权限,然后创建 `/etc/init.d/nezha-service`

```
#!/bin/sh /etc/rc.common
Expand Down Expand Up @@ -251,6 +250,7 @@ restart() {
```nginx
server{
#原有的一些配置
#server_name blablabla...
location /ws {
Expand Down
24 changes: 16 additions & 8 deletions cmd/agent/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ var (

var (
client pb.NezhaServiceClient
ctx = context.Background()
updateCh = make(chan struct{}) // Agent 自动更新间隔
httpClient = &http.Client{
Transport: &http.Transport{
Expand All @@ -56,6 +55,7 @@ var (

const (
delayWhenError = time.Second * 10 // Agent 重连间隔
networkTimeOut = time.Second * 5 // 普通网络超时
)

func main() {
Expand Down Expand Up @@ -113,7 +113,7 @@ func run() {
}

for {
timeOutCtx, cancel := context.WithTimeout(ctx, time.Second*5)
timeOutCtx, cancel := context.WithTimeout(context.Background(), networkTimeOut)
conn, err = grpc.DialContext(timeOutCtx, server, grpc.WithInsecure(), grpc.WithPerRPCCredentials(&auth))
if err != nil {
println("grpc.Dial err: ", err)
Expand All @@ -124,19 +124,25 @@ func run() {
cancel()
client = pb.NewNezhaServiceClient(conn)
// 第一步注册
_, err = client.ReportSystemInfo(ctx, monitor.GetHost().PB())
timeOutCtx, cancel = context.WithTimeout(context.Background(), networkTimeOut)
_, err = client.ReportSystemInfo(timeOutCtx, monitor.GetHost().PB())
if err != nil {
println("client.ReportSystemInfo err: ", err)
cancel()
retry()
continue
}
cancel()
// 执行 Task
tasks, err := client.RequestTask(ctx, monitor.GetHost().PB())
timeOutCtx, cancel = context.WithTimeout(context.Background(), networkTimeOut)
tasks, err := client.RequestTask(timeOutCtx, monitor.GetHost().PB())
if err != nil {
println("client.RequestTask err: ", err)
cancel()
retry()
continue
}
cancel()
err = receiveTasks(tasks)
println("receiveTasks exit to main: ", err)
retry()
Expand Down Expand Up @@ -226,7 +232,7 @@ func doTask(task *pb.Task) {
if err != nil {
// 进程组创建失败,直接退出
result.Data = err.Error()
client.ReportTask(ctx, &result)
client.ReportTask(context.Background(), &result)
return
}
timeout := time.NewTimer(time.Hour * 2)
Expand Down Expand Up @@ -258,7 +264,7 @@ func doTask(task *pb.Task) {
default:
println("Unknown action: ", task)
}
client.ReportTask(ctx, &result)
client.ReportTask(context.Background(), &result)
}

func reportState() {
Expand All @@ -268,14 +274,16 @@ func reportState() {
for {
if client != nil {
monitor.TrackNetworkSpeed()
_, err = client.ReportSystemState(ctx, monitor.GetState(dao.ReportDelay).PB())
timeOutCtx, cancel := context.WithTimeout(context.Background(), networkTimeOut)
_, err = client.ReportSystemState(timeOutCtx, monitor.GetState(dao.ReportDelay).PB())
cancel()
if err != nil {
println("reportState error", err)
time.Sleep(delayWhenError)
}
if lastReportHostInfo.Before(time.Now().Add(-10 * time.Minute)) {
lastReportHostInfo = time.Now()
client.ReportSystemInfo(ctx, monitor.GetHost().PB())
client.ReportSystemInfo(context.Background(), monitor.GetHost().PB())
}
}
}
Expand Down

0 comments on commit 373b95a

Please sign in to comment.