001/* 002 * Stallion Core: A Modern Web Framework 003 * 004 * Copyright (C) 2015 - 2016 Stallion Software LLC. 005 * 006 * This program is free software: you can redistribute it and/or modify it under the terms of the 007 * GNU General Public License as published by the Free Software Foundation, either version 2 of 008 * the License, or (at your option) any later version. This program is distributed in the hope that 009 * it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 010 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 011 * License for more details. You should have received a copy of the GNU General Public License 012 * along with this program. If not, see <http://www.gnu.org/licenses/gpl-2.0.html>. 013 * 014 * 015 * 016 */ 017 018package io.stallion.monitoring; 019 020import io.stallion.asyncTasks.TaskHealthInfo; 021import io.stallion.jobs.JobHealthInfo; 022import io.stallion.settings.Settings; 023 024import java.io.File; 025import java.util.ArrayList; 026import java.util.List; 027 028import static io.stallion.utils.Literals.*; 029import java.lang.management.ManagementFactory; 030import java.lang.management.OperatingSystemMXBean; 031import com.sun.management.UnixOperatingSystemMXBean; 032 033public class HealthInfo { 034 private HttpHealthInfo http = null; 035 private List<JobHealthInfo> jobs = list(); 036 private TaskHealthInfo tasks = null; 037 private List<EndpointHealthInfo> endpoints = null; 038 private List<String> errors = new ArrayList<>(); 039 private List<String> warnings = list(); 040 private SystemHealth system = null; 041 private int httpStatusCode = 200; 042 043 private static final long MIN_DISK_SPACE = 1024 * 1024 * 1024; // 1 GB Min disk space 044 private static final long MAX_MEM_USAGE = 500 * 1024 * 1024; // 500MB max memory 045 private static final long MAX_FILE_HANDLES = 4000; 046 047 public HealthInfo hydrateSystemHealth() { 048 Runtime rt = Runtime.getRuntime(); 049 long memUsage = (rt.totalMemory() - rt.freeMemory()); 050 long usedMB = memUsage / 1024 / 1024; 051 system.setJvmMemoryUsage(memUsage); 052 system.setJvmMemoryUsageMb(usedMB); 053 system.setDiskFreeAppDirectory(new File(Settings.instance().getTargetFolder()).getUsableSpace()); 054 system.setDiskFreeDataDirectory(new File(Settings.instance().getDataDirectory()).getUsableSpace()); 055 system.setDiskFreeDataDirectoryMb(new File(Settings.instance().getDataDirectory()).getUsableSpace() / 1024 / 1024); 056 system.setDiskFreeLogDirectory(new File("/tmp").getUsableSpace()); 057 OperatingSystemMXBean os = ManagementFactory.getOperatingSystemMXBean(); 058 if(os instanceof UnixOperatingSystemMXBean){ 059 UnixOperatingSystemMXBean unixBean = (UnixOperatingSystemMXBean) os; 060 //unixBean. 061 // get system load 062 // get process CPU load 063 064 system.setFileHandlesOpen(unixBean.getOpenFileDescriptorCount()); 065 } 066 return this; 067 } 068 069 public HealthInfo hydrateErrors() { 070 if (http != null) { 071 if (http.getError500s() > 0 && ((http.getError500s() / http.getRequestCount()) > .002)){ 072 warnings.add("Had " + http.getError500s() + " 500 errors"); 073 } 074 if (http.getError500s() > 0 && ((http.getError500s() / http.getRequestCount()) > .05)){ 075 errors.add("Had " + http.getError500s() + " 500 errors"); 076 } 077 078 if (http.getError400s() > 10 && http.getRequestCount() > 100 && 079 ((http.getError400s() / http.getRequestCount()) > .1)) { 080 warnings.add("Too many 400 errors: " + http.getError400s() + " errors out of " + http.getRequestCount() + " requests"); 081 } 082 } 083 if (jobs != null) { 084 for (JobHealthInfo info : getJobs()) { 085 if (info.getExpectCompleteBy() < mils()) { 086 warnings.add("Job " + info.getJobName() + " has not completed in the expected time. "); 087 } 088 if (!empty(info.getError())) { 089 warnings.add("Job " + info.getJobName() + " finished last run with errors."); 090 } 091 // Job has been overdue for half-a-day, give an error 092 if (info.getExpectCompleteBy() < (mils() - (42000 * 1000))) { 093 errors.add("Job " + info.getJobName() + " has been overdue for more than a day and a half. "); 094 } 095 if (info.getFailCount() > 2) { 096 warnings.add("Job " + info.getJobName() + " has failed more than three times in a row."); 097 } 098 099 } 100 } 101 if (tasks != null) { 102 if (tasks.getStuckTasks() > 0) { 103 warnings.add("Stuck async tasks were found."); 104 } 105 } 106 if (endpoints != null) { 107 for (EndpointHealthInfo info : endpoints) { 108 if (info.getStatusCode() >= 400) { 109 errors.add("Endpoint " + info.getUrl() + " had a bad status: " + info.getStatusCode()); 110 } 111 if (!info.isFoundString()) { 112 errors.add("Check string not found for endpoint " + info.getUrl()); 113 } 114 } 115 } 116 if (system != null) { 117 if (system.getDiskFreeAppDirectory() < MIN_DISK_SPACE) { 118 errors.add("App directory is below the minimum disk space. "); 119 } 120 if (system.getDiskFreeDataDirectory() < MIN_DISK_SPACE) { 121 errors.add("Data directory is below the minimum disk space. "); 122 } 123 124 if (system.getDiskFreeLogDirectory() < MIN_DISK_SPACE) { 125 errors.add("Log directory is below the minimum disk space. "); 126 } 127 if (system.getJvmMemoryUsage() > MAX_MEM_USAGE) { 128 warnings.add("JVM is using more than allowed memory."); 129 } 130 long maxFileHandles = MAX_FILE_HANDLES; 131 if (system.getFileHandlesMax() > 0) { 132 maxFileHandles = system.getFileHandlesMax(); 133 } 134 if ((new Double(system.getFileHandlesOpen()) / new Double(maxFileHandles)) > .8 || system.getFileHandlesAvailable() < 750) { 135 errors.add("Running out of available file handles open on your system!"); 136 } 137 138 if (system.getMemoryPercentFree() < .2) { 139 warnings.add("Using more than 80% of total memory, physical and swap."); 140 } 141 if (system.getSwapPagingRate() > 25) { 142 warnings.add("Swapping rate is over 25 pages."); 143 } 144 145 if (system.isSslExpiresWithin21Days()) { 146 warnings.add("SSL certificate expires within 21 days. Make sure to update it!"); 147 } 148 if (system.isSslExpiresWithin7Days()) { 149 errors.add("SSL certificate expires within 7 days. Make sure to update it!"); 150 } 151 } 152 return this; 153 } 154 155 156 public HttpHealthInfo getHttp() { 157 return http; 158 } 159 160 public void setHttp(HttpHealthInfo http) { 161 this.http = http; 162 } 163 164 165 166 public TaskHealthInfo getTasks() { 167 return tasks; 168 } 169 170 public void setTasks(TaskHealthInfo tasks) { 171 this.tasks = tasks; 172 } 173 174 public List<JobHealthInfo> getJobs() { 175 return jobs; 176 } 177 178 public void setJobs(List<JobHealthInfo> jobs) { 179 this.jobs = jobs; 180 } 181 182 public List<EndpointHealthInfo> getEndpoints() { 183 return endpoints; 184 } 185 186 public void setEndpoints(List<EndpointHealthInfo> endpoints) { 187 this.endpoints = endpoints; 188 } 189 190 public List<String> getErrors() { 191 return errors; 192 } 193 194 public void setErrors(List<String> errors) { 195 this.errors = errors; 196 } 197 198 public List<String> getWarnings() { 199 return warnings; 200 } 201 202 public void setWarnings(List<String> warnings) { 203 this.warnings = warnings; 204 } 205 206 public SystemHealth getSystem() { 207 return system; 208 } 209 210 public void setSystem(SystemHealth system) { 211 this.system = system; 212 } 213 214 public int getHttpStatusCode() { 215 return httpStatusCode; 216 } 217 218 public void setHttpStatusCode(int httpStatusCode) { 219 this.httpStatusCode = httpStatusCode; 220 } 221}