赞
踩
最近我们负责的一个电网行业的业务平台,出现了一个非常奇葩的问题。这个项目涉及到软件平台厂家、服务器厂家、存储厂家以及真正的用户等四方关系,而我们主要负责软件平台的研发工作。平台正常运行了2年多了,去年年底存储盘阵中出现历史巡检数据丢失或者无法使用的诡异问题,软件、服务以及存储各方对根据运行日志进行了审计,各自都说没有问题,接着,请了比较权威的第三方测评单位,召开了5次讨论会,也没有分析出真正的原因,给出了一个***看似非常弱智的原因***,最后只能几家摊钱恢复数据(因为每家只能证明自身没有问题,并不能证明问题真正的原因是什么环节,所以只能摊钱)。
俗话说:“吃一堑长一智”。业务平台作为应用层,是最接近用户的。平台一出问题,给用户的第一印象就是这个平台太烂了,又出问题了。为了以后出现问题,软件平台可以对问题进行实时追踪和根据历史数据运行记录进行审计,便于更快速的缩小问题范围,更精确的定位问题。
我们计划从数据监控和服务监控两个大维度进行实时监控;从巡检数据的上传、下载以及变更等操作详细记录用户的操作痕迹。
OSHI是Java的免费的基于JNA的(本机)操作系统和硬件信息库。它不需要安装任何其他本机库,并且旨在提供一种跨平台的实现来检索系统信息,例如OS版本,进程,内存和CPU使用率,磁盘和分区,设备,传感器等。
我们的业务平台是采用Spring boot开发的,故此主要是通过Springboot集成oshi来获取需要实时关注的参数信息。
<dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-web</artifactId> </dependency> <!-- 获取系统信息 --> <dependency> <groupId>com.github.oshi</groupId> <artifactId>oshi-core</artifactId> <version>3.9.1</version> </dependency> <!--常用工具类 --> <dependency> <groupId>org.apache.commons</groupId> <artifactId>commons-lang3</artifactId> </dependency> <!-- 阿里JSON解析器 --> <dependency> <groupId>com.alibaba</groupId> <artifactId>fastjson</artifactId> <version>1.2.47</version> </dependency> <dependency> <groupId>org.projectlombok</groupId> <artifactId>lombok</artifactId> <optional>true</optional> </dependency>
我们主要关注主服务器的CPU、内存、以及所挂在的存储盘阵的空间容量使用情况等信息,具体模型如下:
public class Cpu { /** * 核心数 */ private int cpuNum; /** * CPU总的使用率 */ private double total; /** * CPU系统使用率 */ private double sys; /** * CPU用户使用率 */ private double used; /** * CPU当前等待率 */ private double wait; /** * CPU当前空闲率 */ private double free; public int getCpuNum() { return cpuNum; } public void setCpuNum(int cpuNum) { this.cpuNum = cpuNum; } public double getTotal() { return Arith.round(Arith.mul(total, 100), 2); } public void setTotal(double total) { this.total = total; } public double getSys() { return Arith.round(Arith.mul(sys / total, 100), 2); } public void setSys(double sys) { this.sys = sys; } public double getUsed() { return Arith.round(Arith.mul(used / total, 100), 2); } public void setUsed(double used) { this.used = used; } public double getWait() { return Arith.round(Arith.mul(wait / total, 100), 2); } public void setWait(double wait) { this.wait = wait; } public double getFree() { return Arith.round(Arith.mul(free / total, 100), 2); } public void setFree(double free) { this.free = free; } @Override public String toString() { return "Cpu{" + "cpuNum=" + getCpuNum() + ", total=" + getTotal() + ", sys=" + getSys() + ", used=" + getUsed() + ", wait=" + getWait() + ", free=" + getFree() + '}'; } }
public class Mem { /** * 内存总量 */ private double total; /** * 已用内存 */ private double used; /** * 剩余内存 */ private double free; public double getTotal() { return Arith.div(total, (1024 * 1024 * 1024), 2); } public void setTotal(long total) { this.total = total; } public double getUsed() { return Arith.div(used, (1024 * 1024 * 1024), 2); } public void setUsed(long used) { this.used = used; } public double getFree() { return Arith.div(free, (1024 * 1024 * 1024), 2); } public void setFree(long free) { this.free = free; } public double getUsage() { return Arith.mul(Arith.div(used, total, 4), 100); } @Override public String toString() { return "Mem{" + "total=" + getTotal() + ", used=" + getUsed() + ", free=" + getFree() + '}'; } }
public class SysFile { /** * 盘符路径 */ private String dirName; /** * 盘符类型 */ private String sysTypeName; /** * 文件类型 */ private String typeName; /** * 总大小 */ private String total; /** * 剩余大小 */ private String free; /** * 已经使用量 */ private String used; /** * 资源的使用率 */ private double usage; @Override public String toString() { return "SysFile{" + "dirName='" + dirName + '\'' + ", sysTypeName='" + sysTypeName + '\'' + ", typeName='" + typeName + '\'' + ", total='" + total + '\'' + ", free='" + free + '\'' + ", used='" + used + '\'' + ", usage=" + usage + '}'; } }
public class Server { private static final int OSHI_WAIT_SECOND = 1000; /** * CPU相关信息 */ private Cpu cpu = new Cpu(); /** * 內存相关信息 */ private Mem mem = new Mem(); /** * 服务器相关信息 */ private Sys sys = new Sys(); /** * 磁盘相关信息 */ private List<SysFile> sysFiles = new LinkedList<>(); public Cpu getCpu() { return cpu; } public void setCpu(Cpu cpu) { this.cpu = cpu; } public Mem getMem() { return mem; } public void setMem(Mem mem) { this.mem = mem; } public Sys getSys() { return sys; } public void setSys(Sys sys) { this.sys = sys; } public List<SysFile> getSysFiles() { return sysFiles; } public void setSysFiles(List<SysFile> sysFiles) { this.sysFiles = sysFiles; } public void copyTo() throws Exception { SystemInfo si = new SystemInfo(); HardwareAbstractionLayer hal = si.getHardware(); setCpuInfo(hal.getProcessor()); setMemInfo(hal.getMemory()); setSysInfo(); setSysFiles(si.getOperatingSystem()); } /** * 设置CPU信息 */ private void setCpuInfo(CentralProcessor processor) { // CPU信息 long[] prevTicks = processor.getSystemCpuLoadTicks(); Util.sleep(OSHI_WAIT_SECOND); long[] ticks = processor.getSystemCpuLoadTicks(); long nice = ticks[TickType.NICE.getIndex()] - prevTicks[TickType.NICE.getIndex()]; long irq = ticks[TickType.IRQ.getIndex()] - prevTicks[TickType.IRQ.getIndex()]; long softirq = ticks[TickType.SOFTIRQ.getIndex()] - prevTicks[TickType.SOFTIRQ.getIndex()]; long steal = ticks[TickType.STEAL.getIndex()] - prevTicks[TickType.STEAL.getIndex()]; long cSys = ticks[TickType.SYSTEM.getIndex()] - prevTicks[TickType.SYSTEM.getIndex()]; long user = ticks[TickType.USER.getIndex()] - prevTicks[TickType.USER.getIndex()]; long iowait = ticks[TickType.IOWAIT.getIndex()] - prevTicks[TickType.IOWAIT.getIndex()]; long idle = ticks[TickType.IDLE.getIndex()] - prevTicks[TickType.IDLE.getIndex()]; long totalCpu = user + nice + cSys + idle + iowait + irq + softirq + steal; cpu.setCpuNum(processor.getLogicalProcessorCount()); cpu.setTotal(totalCpu); cpu.setSys(cSys); cpu.setUsed(user); cpu.setWait(iowait); cpu.setFree(idle); } /** * 设置内存信息 */ private void setMemInfo(GlobalMemory memory) { mem.setTotal(memory.getTotal()); mem.setUsed(memory.getTotal() - memory.getAvailable()); mem.setFree(memory.getAvailable()); } /** * 设置服务器信息 */ private void setSysInfo() { Properties props = System.getProperties(); sys.setComputerName(IpUtils.getHostName()); sys.setComputerIp(IpUtils.getHostIp()); sys.setOsName(props.getProperty("os.name")); sys.setOsArch(props.getProperty("os.arch")); sys.setUserDir(props.getProperty("user.dir")); } /** * 设置磁盘信息 */ private void setSysFiles(OperatingSystem os) { FileSystem fileSystem = os.getFileSystem(); OSFileStore[] fsArray = fileSystem.getFileStores(); for (OSFileStore fs : fsArray) { long free = fs.getUsableSpace(); long total = fs.getTotalSpace(); long used = total - free; SysFile sysFile = new SysFile(); sysFile.setDirName(fs.getMount()); sysFile.setSysTypeName(fs.getType()); sysFile.setTypeName(fs.getName()); sysFile.setTotal(convertFileSize(total)); sysFile.setFree(convertFileSize(free)); sysFile.setUsed(convertFileSize(used)); sysFile.setUsage(Arith.mul(Arith.div(used, total, 4), 100)); System.out.println(sysFile); sysFiles.add(sysFile); } } /** * 字节转换 * * @param size 字节大小 * @return 转换后值 */ public String convertFileSize(long size) { long kb = 1024; long mb = kb * 1024; long gb = mb * 1024; if (size >= gb) { return String.format("%.1f GB", (float) size / gb); } else if (size >= mb) { float f = (float) size / mb; return String.format(f > 100 ? "%.0f MB" : "%.1f MB", f); } else if (size >= kb) { float f = (float) size / kb; return String.format(f > 100 ? "%.0f KB" : "%.1f KB", f); } else { return String.format("%d B", size); } } }
/** * 服务器监控 * */ @RestController @RequestMapping(value = "/monitor/server") public class ServerController{ @GetMapping(value = "list") public String server() throws Exception { JSONObject jsonObject = new JSONObject(); Server server = new Server(); server.copyTo(); return JSON.toJSONString(server); } }
2020-03-13 16:27:22.454 DEBUG 11272 --- [nio-8088-exec-1] o.s.web.servlet.DispatcherServlet : enableLoggingRequestDetails='false': request parameters and headers will be masked to prevent unsafe logging of potentially sensitive data
2020-03-13 16:27:22.454 INFO 11272 --- [nio-8088-exec-1] o.s.web.servlet.DispatcherServlet : Completed initialization in 5 ms
2020-03-13 16:27:22.463 DEBUG 11272 --- [nio-8088-exec-1] o.s.web.servlet.DispatcherServlet : GET "/monitor/server/list", parameters={}
2020-03-13 16:27:22.466 DEBUG 11272 --- [nio-8088-exec-1] s.w.s.m.m.a.RequestMappingHandlerMapping : Mapped to com.server.htzw.monitor.controller.ServerController#server()
SysFile{dirName='F:\', sysTypeName='NTFS', typeName='本地固定磁盘 (F:)', total='700.0 GB', free='607.6 GB', used='92.4 GB', usage=13.2}
SysFile{dirName='C:\', sysTypeName='NTFS', typeName='本地固定磁盘 (C:)', total='226.7 GB', free='162.2 GB', used='64.5 GB', usage=28.46}
SysFile{dirName='E:\', sysTypeName='NTFS', typeName='本地固定磁盘 (E:)', total='237.1 GB', free='141.5 GB', used='95.6 GB', usage=40.33}
SysFile{dirName='H:\', sysTypeName='NTFS', typeName='本地固定磁盘 (H:)', total='1863.0 GB', free='218.0 GB', used='1645.0 GB', usage=88.3}
SysFile{dirName='D:\', sysTypeName='NTFS', typeName='本地固定磁盘 (D:)', total='100.0 GB', free='98.7 GB', used='1.3 GB', usage=1.3}
SysFile{dirName='G:\', sysTypeName='NTFS', typeName='本地固定磁盘 (G:)', total='1063.0 GB', free='1057.8 GB', used='5.2 GB', usage=0.49}
Memory=:Mem{total=63.89, used=16.02, free=47.87}
CPU=:Cpu{cpuNum=8, total=812500.0, sys=2.28, used=1.54, wait=0.0, free=96.16}
sysInfo=:Sys{computerName='AFODY-003091210', computerIp='10.0.4.55', userDir='F:\monitor', osName='Windows 10', osArch='amd64'}
{"cpu":{"cpuNum":8,"free":64.26,"sys":10.89,"total":800000.0,"used":24.8,"wait":0.0},"mem":{"free":47.59,"total":63.89,"usage":25.5,"used":16.29},"sys":{"computerIp":"10.0.4.55","computerName":"AFODY-003091210","osArch":"amd64","osName":"Windows 10","userDir":"F:\\monitor"},"sysFiles":[{"dirName":"F:\\","free":"607.6 GB","sysTypeName":"NTFS","total":"700.0 GB","typeName":"本地磁盘 (F:\\)","usage":13.2,"used":"92.4 GB"},{"dirName":"C:\\","free":"162.1 GB","sysTypeName":"NTFS","total":"226.7 GB","typeName":"Windows10 (C:\\)","usage":28.5,"used":"64.6 GB"},{"dirName":"E:\\","free":"141.5 GB","sysTypeName":"NTFS","total":"237.1 GB","typeName":"本地磁盘 (E:\\)","usage":40.33,"used":"95.6 GB"},{"dirName":"H:\\","free":"218.0 GB","sysTypeName":"NTFS","total":"1863.0 GB","typeName":"Seagate Backup Plus Drive (H:\\)","usage":88.3,"used":"1645.0 GB"},{"dirName":"D:\\","free":"98.7 GB","sysTypeName":"NTFS","total":"100.0 GB","typeName":" (D:\\)","usage":1.3,"used":"1.3 GB"},{"dirName":"G:\\","free":"1057.8 GB","sysTypeName":"NTFS","total":"1063.0 GB","typeName":" (G:\\)","usage":0.49,"used":"5.2 GB"}]}
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。