001/* 002 * Stallion Core: A Modern Web Framework 003 * 004 * Copyright (C) 2015 - 2016 Stallion Software LLC. 005 * 006 * This program is free software: you can redistribute it and/or modify it under the terms of the 007 * GNU General Public License as published by the Free Software Foundation, either version 2 of 008 * the License, or (at your option) any later version. This program is distributed in the hope that 009 * it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 010 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 011 * License for more details. You should have received a copy of the GNU General Public License 012 * along with this program. If not, see <http://www.gnu.org/licenses/gpl-2.0.html>. 013 * 014 * 015 * 016 */ 017 018package io.stallion.monitoring; 019 020import com.sun.management.UnixOperatingSystemMXBean; 021import io.stallion.asyncTasks.SimpleAsyncRunner; 022import io.stallion.exceptions.ClientException; 023import io.stallion.requests.StRequest; 024import io.stallion.requests.StResponse; 025import io.stallion.services.Log; 026import io.stallion.settings.Settings; 027import io.stallion.utils.DateUtils; 028import io.stallion.utils.ProcessHelper; 029import io.stallion.utils.GeneralUtils; 030import org.apache.commons.collections4.queue.CircularFifoQueue; 031import org.apache.commons.lang3.concurrent.BasicThreadFactory; 032 033import javax.net.ssl.*; 034import java.io.IOException; 035import java.lang.management.ManagementFactory; 036import java.lang.management.OperatingSystemMXBean; 037import java.lang.reflect.InvocationTargetException; 038import java.net.URL; 039import java.security.cert.Certificate; 040import java.security.SecureRandom; 041import java.security.cert.CertificateException; 042import java.security.cert.X509Certificate; 043import java.time.ZoneId; 044import java.time.ZonedDateTime; 045import java.time.format.DateTimeFormatter; 046import java.util.Date; 047import java.util.concurrent.ScheduledThreadPoolExecutor; 048import java.util.concurrent.TimeUnit; 049import java.util.concurrent.atomic.AtomicInteger; 050 051import static io.stallion.utils.Literals.empty; 052 053 054public class HealthTracker { 055 private CircularFifoQueue<ExceptionInfo> exceptionQueue = new CircularFifoQueue(100); 056 private CircularFifoQueue<MinuteInfo> response500s = new CircularFifoQueue<>(50); 057 private CircularFifoQueue<MinuteInfo> response400s = new CircularFifoQueue<>(50); 058 private CircularFifoQueue<MinuteInfo> response404s = new CircularFifoQueue<>(50); 059 private CircularFifoQueue<MinuteInfo> responseCounts = new CircularFifoQueue<>(50); 060 private ScheduledThreadPoolExecutor timedChecker; 061 private RollingMetrics metrics = new RollingMetrics(); 062 private DailyMetrics dailyMetrics = new DailyMetrics(); 063 private static HealthTracker _instance = new HealthTracker(); 064 065 private HealthTracker() { 066 } 067 068 public static void start() { 069 BasicThreadFactory factory = new BasicThreadFactory.Builder() 070 .namingPattern("stallion-health-tracker-thread-%d") 071 .build(); 072 instance().timedChecker = new ScheduledThreadPoolExecutor(2, factory); 073 instance().timedChecker.scheduleAtFixedRate(instance().metrics, 0, 1, TimeUnit.MINUTES); 074 instance().timedChecker.scheduleAtFixedRate(instance().dailyMetrics, 0, 24*60, TimeUnit.MINUTES); 075 } 076 077 public static HealthTracker instance() { 078 if (_instance == null) { 079 _instance = new HealthTracker(); 080 } 081 return _instance; 082 } 083 084 public static void shutdown() { 085 if (_instance != null) { 086 if (_instance.timedChecker != null) { 087 _instance.timedChecker.shutdown(); 088 } 089 _instance = null; 090 } 091 } 092 093 public Double getAverageSystemCpuLoad() { 094 int periods = 0; 095 Double total = 0.0; 096 for (Double usage: metrics.getSystemCpuUsage()) { 097 total += usage; 098 periods++; 099 } 100 return total / periods; 101 } 102 103 public Double getAverageAppCpuLoad() { 104 int periods = 0; 105 Double total = 0.0; 106 for (Double usage: metrics.getAppCpuUsage()) { 107 total += usage; 108 periods++; 109 } 110 return total / periods; 111 } 112 113 public Double getSwapPages() { 114 int periods = 0; 115 Double total = 0.0; 116 for (Long pages: metrics.getSwapRate()) { 117 total += pages; 118 periods++; 119 } 120 return total / periods; 121 } 122 123 public ZonedDateTime getSslExpires() { 124 return dailyMetrics.getSslExpires(); 125 } 126 127 public boolean getSslExpiresIn7() { 128 return dailyMetrics.isSslExpiresWithin7(); 129 } 130 131 public boolean getSslExpiresIn21() { 132 return dailyMetrics.isSslExpiresWithin21(); 133 } 134 135 public HttpHealthInfo getHttpHealthInfo() { 136 HttpHealthInfo health = new HttpHealthInfo(); 137 health.setError400s(lastTenMinutesCount(response400s)); 138 health.setError500s(lastTenMinutesCount(response500s)); 139 health.setError404s(lastTenMinutesCount(response404s)); 140 health.setRequestCount(lastTenMinutesCount(responseCounts)); 141 return health; 142 } 143 144 public void logException(Throwable e) { 145 if (e instanceof ClientException) { 146 return; 147 } 148 if (e instanceof InvocationTargetException) { 149 if (((InvocationTargetException) e).getTargetException() instanceof ClientException) { 150 return; 151 } 152 } 153 ExceptionInfo info = ExceptionInfo.newForException(e); 154 exceptionQueue.add(info); 155 if (SimpleAsyncRunner.instance() != null && Settings.instance().getEmailErrors() == true) { 156 SimpleAsyncRunner.instance().submit(new ExceptionEmailRunnable(info)); 157 } 158 } 159 160 public int lastTenMinutesCount(CircularFifoQueue<MinuteInfo> queue) { 161 ZonedDateTime tenAgo = MinuteInfo.getCurrentMinute().minusMinutes(10); 162 int count = 0; 163 for (MinuteInfo info: queue) { 164 if (info.getMinute().isBefore(tenAgo)) { 165 continue; 166 } 167 count += info.getCount().get(); 168 } 169 return count; 170 } 171 172 public void logResponse(StRequest request, StResponse response) { 173 incrementQueue(responseCounts); 174 if (response.getStatus() >= 500) { 175 // If the health endpoint is treating us as down, don't log that 176 // as a 500 error or else we will be down for ever 177 if (!request.getPath().startsWith("/st-internal/")) { 178 incrementQueue(response500s); 179 } 180 } else if (response.getStatus() == 404) { 181 incrementQueue(response404s); 182 } else if (response.getStatus() >= 400) { 183 incrementQueue(response400s); 184 } 185 } 186 187 188 public void incrementQueue(CircularFifoQueue<MinuteInfo> queue) { 189 ZonedDateTime now = MinuteInfo.getCurrentMinute(); 190 MinuteInfo minuteInfo = null; 191 if (!queue.isEmpty()) { 192 minuteInfo = queue.get(queue.size()-1); 193 //minuteInfo = queue.get(0); 194 //Log.info("first: {0}", queue.get(0).getMinute()); 195 //Log.info("last: {0}", queue.get(queue.size() -1).getMinute()); 196 //Log.info("now: {0}", now); 197 if (!minuteInfo.getMinute().equals(now)) { 198 //Log.info("Minutes do not matched, prepare for new minute"); 199 minuteInfo = null; 200 } 201 } 202 if (minuteInfo == null) { 203 minuteInfo = new MinuteInfo(); 204 minuteInfo.setMinute(now); 205 queue.add(minuteInfo); 206 } 207 //Log.info("Increment minute {0} {1}", minuteInfo.getMinute().toString(), minuteInfo.getCount().get()); 208 minuteInfo.getCount().incrementAndGet(); 209 } 210 211 212 213 214 public CircularFifoQueue<ExceptionInfo> getExceptionQueue() { 215 return exceptionQueue; 216 } 217 218 public static class MinuteInfo { 219 private ZonedDateTime minute; 220 private AtomicInteger count = new AtomicInteger(0); 221 private static final DateTimeFormatter formatter = DateTimeFormatter.ofPattern("YYYY-MM-dd-HH:mm"); 222 223 public static ZonedDateTime getCurrentMinute() { 224 ZonedDateTime now = DateUtils.utcNow(); 225 return ZonedDateTime.of( 226 now.getYear(), now.getMonth().getValue(), now.getDayOfMonth(), 227 now.getHour(), now.getMinute(), 0, 0, ZoneId.of("UTC")); 228 } 229 230 public AtomicInteger getCount() { 231 return count; 232 } 233 234 public void setCount(AtomicInteger count) { 235 this.count = count; 236 } 237 238 public ZonedDateTime getMinute() { 239 return minute; 240 } 241 242 public void setMinute(ZonedDateTime minute) { 243 this.minute = minute; 244 } 245 } 246 247 public static class DailyMetrics implements Runnable { 248 private double ntpOffset = 0; 249 private ZonedDateTime sslExpires = null; 250 private boolean sslExpiresWithin21 = false; 251 private boolean sslExpiresWithin7 = false; 252 253 254 public void run() { 255 try { 256 if (Settings.instance().getSiteUrl().startsWith("https")) { 257 checkSslExpiration(); 258 } 259 } catch(Exception e) { 260 Log.exception(e, "Error checking SSL"); 261 } 262 263 } 264 265 public void checkSslExpiration() throws Exception { 266 // configure the SSLContext with a TrustManager 267 SSLContext ctx = SSLContext.getInstance("TLS"); 268 ctx.init(new KeyManager[0], new TrustManager[] {new DefaultTrustManager()}, new SecureRandom()); 269 SSLContext.setDefault(ctx); 270 URL url = new URL(Settings.instance().getSiteUrl()); 271 HttpsURLConnection conn = (HttpsURLConnection) url.openConnection(); 272 conn.setHostnameVerifier(new HostnameVerifier() { 273 @Override 274 public boolean verify(String arg0, SSLSession arg1) { 275 return true; 276 } 277 }); 278 System.out.println(conn.getResponseCode()); 279 Certificate[] certs = conn.getServerCertificates(); 280 Date maxDate = new Date(Long.MAX_VALUE); 281 for (Certificate cert :certs){ 282 X509Certificate xCert = (X509Certificate)cert; 283 if (xCert.getNotAfter().before(maxDate)) { 284 maxDate = xCert.getNotAfter(); 285 } 286 } 287 288 setSslExpires(ZonedDateTime.ofInstant(maxDate.toInstant(), GeneralUtils.UTC)); 289 setSslExpiresWithin21(DateUtils.utcNow().plusDays(21).isAfter(getSslExpires())); 290 setSslExpiresWithin7(DateUtils.utcNow().plusDays(7).isAfter(getSslExpires())); 291 conn.disconnect(); 292 } 293 294 295 public double getNtpOffset() { 296 return ntpOffset; 297 } 298 299 public void setNtpOffset(double ntpOffset) { 300 this.ntpOffset = ntpOffset; 301 } 302 303 public ZonedDateTime getSslExpires() { 304 return sslExpires; 305 } 306 307 public void setSslExpires(ZonedDateTime sslExpires) { 308 this.sslExpires = sslExpires; 309 } 310 311 public boolean isSslExpiresWithin21() { 312 return sslExpiresWithin21; 313 } 314 315 public DailyMetrics setSslExpiresWithin21(boolean sslExpiresWithin21) { 316 this.sslExpiresWithin21 = sslExpiresWithin21; 317 return this; 318 } 319 320 public boolean isSslExpiresWithin7() { 321 return sslExpiresWithin7; 322 } 323 324 public DailyMetrics setSslExpiresWithin7(boolean sslExpiresWithin7) { 325 this.sslExpiresWithin7 = sslExpiresWithin7; 326 return this; 327 } 328 } 329 330 331 private static class DefaultTrustManager implements X509TrustManager { 332 333 @Override 334 public void checkClientTrusted(X509Certificate[] arg0, String arg1) throws CertificateException {} 335 336 @Override 337 public void checkServerTrusted(X509Certificate[] arg0, String arg1) throws CertificateException {} 338 339 @Override 340 public X509Certificate[] getAcceptedIssuers() { 341 return null; 342 } 343 } 344 345 public static class RollingMetrics implements Runnable { 346 private CircularFifoQueue<Double> systemCpuUsage = new CircularFifoQueue<>(5); 347 private CircularFifoQueue<Double> appCpuUsage = new CircularFifoQueue<>(5); 348 private CircularFifoQueue<Long> swapRate = new CircularFifoQueue<>(5); 349 private boolean vmstatAvailable = true; 350 351 public void run() { 352 OperatingSystemMXBean os = ManagementFactory.getOperatingSystemMXBean(); 353 if(os instanceof UnixOperatingSystemMXBean){ 354 UnixOperatingSystemMXBean unixBean = (UnixOperatingSystemMXBean) os; 355 getAppCpuUsage().add(unixBean.getProcessCpuLoad()); 356 getSystemCpuUsage().add(unixBean.getSystemLoadAverage()); 357 } 358 if (Settings.instance().getEnv().equals("local") || Settings.instance().getDevMode()) { 359 vmstatAvailable = false; 360 } 361 if (vmstatAvailable) { 362 Runtime rt = Runtime.getRuntime(); 363 int exitVal = 1; 364 Process proc = null; 365 try { 366 proc = rt.exec("vmstat"); 367 exitVal = proc.exitValue(); 368 } catch (IOException e) { 369 } 370 if (exitVal != 0) { 371 // Don't check for vmstat again 372 Log.warn("vmstat executable not found, skipping monitoring of swap rate"); 373 vmstatAvailable = false; 374 } else { 375 ProcessHelper.CommandResult result = ProcessHelper.run("vmstat", "4", "2"); 376 if (result.succeeded()) { 377 String[] lines = result.getOut().split("\n"); 378 String lastLine = lines[lines.length - 1]; 379 if (empty(lastLine.trim())) { 380 lastLine = lines[lines.length - 2]; 381 } 382 String[] stats = lastLine.trim().trim().replaceAll("\\s+", "\t").split("\t"); 383 Long swapIn = Long.parseLong(stats[6]); 384 Long swapOut = Long.parseLong(stats[7]); 385 getSwapRate().add(swapIn + swapOut); 386 } 387 } 388 } 389 390 391 } 392 393 394 public CircularFifoQueue<Double> getSystemCpuUsage() { 395 return systemCpuUsage; 396 } 397 398 public void setSystemCpuUsage(CircularFifoQueue<Double> systemCpuUsage) { 399 this.systemCpuUsage = systemCpuUsage; 400 } 401 402 public CircularFifoQueue<Double> getAppCpuUsage() { 403 return appCpuUsage; 404 } 405 406 public void setAppCpuUsage(CircularFifoQueue<Double> appCpuUsage) { 407 this.appCpuUsage = appCpuUsage; 408 } 409 410 public CircularFifoQueue<Long> getSwapRate() { 411 return swapRate; 412 } 413 414 public void setSwapRate(CircularFifoQueue<Long> swapRate) { 415 this.swapRate = swapRate; 416 } 417 } 418}