# Qark 客户端
在使用原语云平台部署 Lotus 集群之前,你需要先找客服给你开通一个原语云账号,登录到原语云控制台,给你的节点全部安装上 qark-client
客户端 。
# 数据收集声明
为了给用户提供更高质量的服务,原语云需要收集服务所需一些必要的信息,并且仅在为用户提供服务时合理合法使用这些数据。 所有安装了我们 Qark客户端(qark-client) 的服务器,我们都会自动收集服务器硬件配置信息,以便于给用户提供服务器在线管理服务。
qark-client 收集的数据结构如下:
{
"version": 1.22,
"node": {}, // 节点信息
"hd_bios": {}, // 主板 Bios 信息
"hd_sys": {}, // 主板厂商信息
"hd_board": {}, // 主板厂商扩展信息,如主板型号,序列号等
"system": {}, // 操作系统信息
"numa": [], // numa 信息
"cpu": {}, // cpu 信息
"gpu": [], // GPU 信息
"ram": {}, // 内存信息
"disk": [], // 磁盘信息
"vg": [], // 逻辑卷信息
"network": [], // 网络信息
"process": [] // 进程列表
}
查看一个完整的数据包
{
"version": 1.22,
"node": {
"no": 453,
"id": "60c821fb-4cb1fe00-20ac-4301-a0369fd88cb6",
"created_at": 1623728635,
"last_boot_at": 1647253626,
"interval": 30,
"last_stat_begin": 1647267506,
"last_stat_end": 1647267536,
"base_dir": "/etc/qark-client/",
"data_dir": "/var/run/qark-client/",
"log_dir": "/var/log/qark-client/"
},
"hd_bios": {
"vendor": "American Megatrends Inc.",
"version": "1.40",
"r_date": "10/29/2020",
"run_size": 65536,
"rom_size": 33554432
},
"hd_sys": {
"manufacturer": "Micro-Star International Co., Ltd.",
"pro_name": "MS-7C94",
"version": "1.0",
"serial_no": "to be filled by o.e.m.",
"uuid": "c7df20bd-4884-ad19-a13d-2cf05dcf9521"
},
"hd_board": {
"manufacturer": "Micro-Star International Co., Ltd.",
"pro_name": "MAG B550M MORTAR WIFI (",
"version": "1.0",
"serial_no": "07c9410_kb1e178215"
},
"system": {
"pre_def": "Linux",
"sys_name": "ubuntu",
"sys_info": "ubuntu 20.04.4 lts",
"node_name": "lionsoul-MS-7C94",
"release": "5.13.0-35-generic",
"version": "#40~20.04.1-Ubuntu SMP Mon Mar 7 09:18:32 UTC 2022",
"machine": "x86_64"
},
"numa": [
{
"cpu_list": "0-23",
"mem": {
"total": 131835424,
"free": 50718944,
"active": 26328984,
"inactive": 53602200,
"mlocked": 0,
"mapped": 763448,
"shmem": 16956
},
"stat": {
"numa_hit": 109065394,
"numa_miss": 0,
"numa_foreign": 0,
"interleave_hit": 2974,
"local_node": 109065394,
"other_node": 0
}
}
],
"cpu": {
"list": [
{
"vendor_id": "AuthenticAMD",
"model": 33,
"model_name": "AMD Ryzen 9 5900X 12-Core Processor",
"mhz": 2200,
"cache_size": 512,
"physical_id": 0,
"core_id": 0,
"cores": 12
}
],
"stat": {
"round_1": {
"name": "cpu",
"user": 67115,
"nice": 664712,
"system": 51405,
"idle": 32172851
},
"round_2": {
"name": "cpu",
"user": 67190,
"nice": 666192,
"system": 51460,
"idle": 32244295
},
"load_avg": 2.2038493156433105,
"sys_loadavg": {
"t_1m": 1.2799999713897705,
"t_5m": 2.1600000858306885,
"t_15m": 1.7599999904632568
}
}
},
"gpu": [
{
"desc": "VGA compatible controller",
"product": "TU116 [GeForce GTX 1660]",
"vendor": "NVIDIA Corporation",
"bus": "pci@0000:2b:00.0",
"config": "driver=nvidia latency=0",
"width": 64,
"clock": 33,
"query": true,
"driver_version": "470.103.01",
"cuda_version": "11.4",
"name": "NVIDIA GeForce GTX 1660",
"uuid": "GPU-48f27a85-5815-3708-ee8b-7789a6b7e13b",
"fan_speed": 54,
"mem_fb_total": 5922,
"mem_fb_used": 161,
"mem_fb_free": 5761,
"util_gpu": 0,
"util_mem": 0,
"util_dec": 0,
"util_enc": 0,
"temp_current": 52,
"temp_shutdown": 96,
"temp_slowdown": 93,
"temp_maxopt": 91,
"power_draw": 33.09000015258789,
"power_limit": 130,
"power_limit_min": 70,
"power_limit_max": 130,
"clock_graphics": 1530,
"clock_sm": 1530,
"clock_mem": 4001,
"clock_video": 1410,
"clock_graphics_max": 2145,
"clock_sm_max": 2145,
"clock_mem_max": 4001,
"clock_video_max": 1950,
"process": [
{
"id": 1710,
"mem": 86
},
{
"id": 1898,
"mem": 6
},
{
"id": 9516,
"mem": 63
}
]
}
],
"ram": {
"list": [
{
"total_width": 64,
"data_width": 64,
"size": 32768,
"speed": 2133,
"voltage": 1.2000000476837158,
"type": "DDR4",
"manufacturer": "Unknown",
"serial_no": "00000000"
},
{
"total_width": 64,
"data_width": 64,
"size": 32768,
"speed": 2133,
"voltage": 1.2000000476837158,
"type": "DDR4",
"manufacturer": "Unknown",
"serial_no": "00000000"
},
{
"total_width": 64,
"data_width": 64,
"size": 32768,
"speed": 2133,
"voltage": 1.2000000476837158,
"type": "DDR4",
"manufacturer": "Unknown",
"serial_no": "00000000"
},
{
"total_width": 64,
"data_width": 64,
"size": 32768,
"speed": 2133,
"voltage": 1.2000000476837158,
"type": "DDR4",
"manufacturer": "Unknown",
"serial_no": "00000000"
}
],
"stat": {
"total": 131835424,
"free": 50716672,
"available": 125704284,
"buffers": 89636,
"cached": 75809264,
"active": 26329052,
"inactive": 53602732,
"mlocked": 0,
"swapTotal": 2097148,
"swapFree": 2097148,
"mapped": 763924,
"shmem": 16956
}
},
"disk": [
{
"name": "/dev/nvme0n1",
"wwn": "0x0000000000000000",
"serial_no": "0x00000000",
"model": "Unknow",
"blocks": 3907029168,
"size": 2000398934016,
"stat": {
"num_r": 0,
"num_mr": 0,
"num_sr": 0,
"num_r_ms": 0,
"num_w": 0,
"num_mw": 0,
"num_sw": 0,
"num_w_ms": 0,
"num_io": 0,
"num_io_ms": 0,
"num_mio_ms": 0,
"num_io_util": "",
"num_rs_byte": 0,
"num_ws_byte": 0
},
"partitions": [
{
"no": 0,
"start": 0,
"blocks": 3907029168,
"size": 2000398934016,
"name": "/dev/nvme0n1",
"uuid": "996827a4-207c-4fc7-be99-22c2f1ae3d9c",
"format": "xfs",
"mount": "/data1",
"f_flag": 4096,
"f_bsize": 4096,
"f_blocks": 488140180,
"f_bfree": 484728541,
"f_bavail": 484728541,
"f_files": 195351424,
"f_ffree": 195351419,
"f_favail": 195351419,
"total": 1999422177280,
"free": 1985448103936,
"stat": {
"num_r": 0,
"num_mr": 0,
"num_sr": 0,
"num_r_ms": 0,
"num_w": 0,
"num_mw": 0,
"num_sw": 0,
"num_w_ms": 0,
"num_io": 0,
"num_io_ms": 0,
"num_mio_ms": 0
}
}
]
}
],
"vg": [],
"network": [
{
"name": "enp6s0f1",
"speed": 1000,
"hd_addr": "a0:36:9f:d8:8c:b7",
"in_addr": "192.168.2.3",
"round_1": {
"receive": 83360386211,
"r_packets": 80011515,
"transmit": 82106947592,
"t_packets": 58153982
},
"round_2": {
"receive": 83361407701,
"r_packets": 80012428,
"transmit": 82107013369,
"t_packets": 58154336
},
"incoming": 33423.94140625,
"outgoing": 2152.2744140625
}
],
"process": [
{
"user": "lionsoul",
"pid": 6578,
"pri": 14,
"psr": 15,
"cpu": 1.399999976158142,
"mem": 0.10000000149011612,
"vsz": 11389040,
"rss": 262692,
"gpu_id": 0,
"gpu_mem": 0,
"tty": "?",
"stat": "SNl",
"start": "18:41",
"time": "3:07",
"cmd": "./lotus daemon --lotus-make-genesis=devgen.car --genesis-template=localnet.json --bootstrap=false"
},
{
"user": "lionsoul",
"pid": 9516,
"pri": 14,
"psr": 5,
"cpu": 51.599998474121094,
"mem": 2.5999999046325684,
"vsz": 17082720,
"rss": 3435532,
"gpu_id": 0,
"gpu_mem": 64512,
"tty": "?",
"stat": "SNl",
"start": "18:59",
"time": "103:10",
"cmd": "./lotus-miner run --listen=192.168.2.3:2345 --storage-api=local --cluster-detect=true --winning-post=true --winning-worker=true --window-post=true --window-worker=true --batch-partitions=7 --sealing-mgr=true --sector-store=true --deals-mgr=false --nosync="
}
]
}
# 逐台安装
如果你机器比较少的话,建议一台一台通过 ssh 登录到机器安装 Qark SDK,具体步骤如下:
点击桌面的 下载中心, 右键 点此下载Linux 64Bit SDK安装程序(.tar.gz),选择复制链接地址,然后在需要安装 SDK 节点的机器打开终端,下载 SDK 到本地:
wget "<url>" -O qark.tar.gz
Note: 由于 url 中有
&
等特殊字符,所以记得 url 需要用引号引起来,否则会下载失败。解压并安装
tar -xzf qark.tar.gz sudo ./install.sh # 看到类似如下输出,则说明安装成功了 +-Try to stop the running qark-client with pid=750 ... --[Ok] +-Try to copy the qark-client to /usr/local/bin ... --[Ok] |--/etc/qark-client/config.json already exists !!! +-Try to set the service manager ... --[Ok] +-Try to set the auto boot start ... --[Ok] +-Try to start the qark-client service ... --[Ok]
详细安装步骤请参考:客户端下载和安装 (opens new window)
错误处理:
我们云服务是按台付费的,如果发现节点没有注册成功的话,请打开
qark
的日志,/var/log/qark-client/qark.log
看下错误日志:- errno = 3: 连不上原语云服务器,请检查您的机器网络连接是否良好,dns 服务是否正常工作,
ping yycloud.pro
- errno = 25: 注册的机器数量超出限制,比如您这个账号只开通了 5 台服务管理限额,如果试图安装第 6 台则会报错:
Failed to register the node with errno=25, res_buffer={"errno":21,"data":"failed to increase the cur_node_num, the number of your registered servers exceeds the maximum limit 1"}
- errno = 111: 原语云账号被锁定了,此时你应该联系原语云客服解锁账户,解锁之后你需要重启服务
- errno = 112: 原语云账号过期超过 7 天了,SDK 进程会自动退出,这时你需要续费后再手动启动服务:
service qark-client start
- errno = 3: 连不上原语云服务器,请检查您的机器网络连接是否良好,dns 服务是否正常工作,
# 批量安装
如果你的机器数量比较多,比如你有 50 台机器,那么你逐台安装的话,可能会比较费时,这里我们提供了批量安装的方法,具体操作步骤如下:
登录原语云控制台 -> 点击
SDK 下载
-> 右键点击点此下载Linux 64Bit SDK安装程序(.tar.gz) >>
-> 复制链接地址。下载批量安装脚本 install_qark.sh,修改 qark SDK 的下载路径,每个账户的下载路径是不一样的。
url="<sdk_download_url>" # url 改成自己的下载地址
把
<sdk_download_url>
修改成第 (1) 步复制的地址。给install_qark.sh
添加可执行权限:echo +x install_qark.sh
注意
远程下载比较慢,如果你的机器比较多的话,我们推荐的方式是,你先把你自己的 SDK 下载到 Miner 机器的
/var/www/html
,然后把下载地址改成内部局域网的地址,供其他机器下载。这里你需要在你的 Miner 上先装上 qark-client,它需要先启动 qark-client 内置的 http 服务。
url="http://<ip>:17181/qark.tar.gz" # <ip> 需要替换成你 Miner 的 IP,注意必须是能跟 Worker 通信的 IP
下载配置文件 machine.conf,把你的机器的 ssh 连接信息按照格式填入:
- user: ssh 用户名
- password: ssh 密码
- port: ssh 端口
- hostname: 主机名称,也是安装完成之后 qark 显示的节点名称。安装过程中会自动更改节点的 hostname 以及
/etc/hostname
文件。
# usage 192.168.1.111 <user> <password> <port> <hostname> # e.g 192.168.1.112 user1 123456 22 lotus-worker1
machine.txt 要跟 install_qark.sh 在同一个目录。
执行批量安装脚本
./install_qark.sh
接下来脚本会自动去批量安装 qark-sdk 到节点。安装完成之后,会在当前目录下生成一个
log
文件夹,打开相应的文件夹检测安装结果,如果有安装失败的,找到原因(一般是 ssh 连接失败)之后重新安装。2020-09-22 14:41:18 Install qark successfully, worker135@192.168.10.180:22 2020-09-22 14:41:28 Install qark successfully, worker136@192.168.10.181:22 2020-09-22 14:41:38 Install qark successfully, worker137@192.168.10.182:22 2020-09-22 14:41:49 Install qark failed, worker138@192.168.10.183:22 2020-09-22 14:41:59 Install qark successfully, worker139@192.168.10.184:22
# 重启 qark-client 服务
# 1. 停止 qark-client 服务
service qark-client stop
# 2. 启动 qark-client 服务
service qark-client start
# 卸载 qark-client 服务
新建一个卸载脚本
uninstall.sh
脚本粘贴以下代码到
uninstall.sh
#!/bin/sh ### qark-client uninstall script bin_file=qark-client service_file=qark-client install_dir=/usr/local/bin get_pid() { echo `ps -ef|grep qark-client|grep -v grep|awk '{print $2}'` } # check and stop the running service first pid=$(get_pid) if [ -n "$pid" ] && [ $pid -ne 0 ] ; then echo -n "+-Try to stop the running qark-client with pid=$pid ... " sudo service qark-client stop echo " --[Ok]" fi echo -n "+-Try to remove the $install_dir/$bin_file ... " sudo rm -f $install_dir/$bin_file echo " --[Ok]" echo -n "+-Try to remove the service manager script ... " sudo rm -f /etc/init.d/$service_file sudo update-rc.d $service_file remove echo " --[Ok]" echo -n "+-Try to remove the boot auto start script ... " sudo rm -f /etc/rc2.d/S01qark-client echo " --[Ok]"
执行卸载脚本
chmod +x ./uninstall.sh ./uninstall.sh
# 设置服务密码
为了防止运维人员误操作,在推送任务的时候需要输入服务密码。所以建议在第一次登录控制台的时候先设置好服务密码。
点击左下角的
开始
图标 ->账户设置
输入登录密码,设置服务密码
# 转移镜像服务
原语云将在全球提供了多个镜像服务,旨在为客户提供高可用的云服务。目前原语云在全球可用的镜像有:
为积极响应国家政策法规,中国大陆镜像将于 2021年10月16关闭服务,届时客户可以将服务转移至原语云的中国香港镜像,具体转移步骤如下:
提示
整个转移过程不会影响你当前集群的运行,你不需要对集群做任何操作!!!
双击控制台桌面上的
任务面板
->新建任务
,填入一个任务名称和说明如 "转移服务镜像",如果你还不知道如何新建任务,请参考 新建任务 教程。点击当前任务的
更多操作
下来菜单,选择代码
在代码编辑界面,选择
Ark
脚本引擎,然后在任务脚本输入框中将下面的代码粘贴进去,保存脚本.local mirrors_src = "yyyun.pro" local mirrors_dst = "yycloud.pro" local config_file = "/etc/qark-client/config.json" if (file_exists(config_file) == false) then print("Error: Unable to find file ", config_file, "\n") return false end -- get the content of the config file -- and replace the app_key line local fd = fopen(config_file, "r") if (fd == nil) then print("Error: Failed to open the config file ", config_file, "\n") return false end local buff = {} local mnum = 0 local line = fgets(fd, 1024) print("+-Try to replace the mirrors config ... ") while (line ~= nil) do if (strstr(line, mirrors_src) ~= nil) then mnum = mnum + 1 table.insert(buff, str_replace(line, mirrors_src, mirrors_dst)) else table.insert(buff, line) end line = fgets(fd, 1024) end fclose(fd) print(" --[Done] with ", mnum, " rows\n") -- write the content to the config file local str = table.concat(buff) print("+-Try to update the config file ... ") if (file_put_contents(config_file, str) == false) then print(" --[Failed]\n") return false else print(" --[Ok]\n") end -- get the qark-client pid local pid = tonumber(trim(file_get_contents("/var/run/qark-client/qark.pid"))) if (pid ~= nil and pid > 0) then -- restart the qark-cleint print("+-Try to restart qark-cleint ... ") local r = kill(pid, SIGUSR1) if (r == true) then print(" --[Ok]\n") else print(" --[Failed]\n") return false end else print("+-Try to start qark-cleint ... ") r = os.execute("/usr/local/bin/qark-cleint -d /etc/qark-client/ > /dev/null &") if (r == true) then print(" --[Ok]\n") else print(" --[Failed]\n") return false end end return true
上面有两个变量需要注意下,其中
mirrors_src
表示原镜像域名,mirrors_dst
表示目的镜像域名,这里分别是yyyun.pro
(大陆) 和yycloud.pro
(香港)。点击
更多操作
下来菜单,选择运行
,然后再选择节点分发方式为全部非故障节点
,再点击确定
按钮,最后输入服务密码之后即可执行。脚本执行成功之后,右上角会弹出提示框,这时你会发现就镜像的节点全部离线了,然后登录新的镜像服务器:https://yycloud.pro (opens new window) 如果发现节点在线,则说明当前节点转移服务成功,否则说明转移失败,请联系原语云客服。