001/*
002 * Stallion Core: A Modern Web Framework
003 *
004 * Copyright (C) 2015 - 2016 Stallion Software LLC.
005 *
006 * This program is free software: you can redistribute it and/or modify it under the terms of the
007 * GNU General Public License as published by the Free Software Foundation, either version 2 of
008 * the License, or (at your option) any later version. This program is distributed in the hope that
009 * it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
010 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
011 * License for more details. You should have received a copy of the GNU General Public License
012 * along with this program.  If not, see <http://www.gnu.org/licenses/gpl-2.0.html>.
013 *
014 *
015 *
016 */
017
018package io.stallion.monitoring;
019
020import com.sun.management.UnixOperatingSystemMXBean;
021import io.stallion.asyncTasks.SimpleAsyncRunner;
022import io.stallion.exceptions.ClientException;
023import io.stallion.requests.StRequest;
024import io.stallion.requests.StResponse;
025import io.stallion.services.Log;
026import io.stallion.settings.Settings;
027import io.stallion.utils.DateUtils;
028import io.stallion.utils.ProcessHelper;
029import io.stallion.utils.GeneralUtils;
030import org.apache.commons.collections4.queue.CircularFifoQueue;
031import org.apache.commons.lang3.concurrent.BasicThreadFactory;
032
033import javax.net.ssl.*;
034import java.io.IOException;
035import java.lang.management.ManagementFactory;
036import java.lang.management.OperatingSystemMXBean;
037import java.lang.reflect.InvocationTargetException;
038import java.net.URL;
039import java.security.cert.Certificate;
040import java.security.SecureRandom;
041import java.security.cert.CertificateException;
042import java.security.cert.X509Certificate;
043import java.time.ZoneId;
044import java.time.ZonedDateTime;
045import java.time.format.DateTimeFormatter;
046import java.util.Date;
047import java.util.concurrent.ScheduledThreadPoolExecutor;
048import java.util.concurrent.TimeUnit;
049import java.util.concurrent.atomic.AtomicInteger;
050
051import static io.stallion.utils.Literals.empty;
052
053
054public class HealthTracker {
055    private CircularFifoQueue<ExceptionInfo> exceptionQueue = new CircularFifoQueue(100);
056    private CircularFifoQueue<MinuteInfo> response500s = new CircularFifoQueue<>(50);
057    private CircularFifoQueue<MinuteInfo> response400s = new CircularFifoQueue<>(50);
058    private CircularFifoQueue<MinuteInfo> response404s = new CircularFifoQueue<>(50);
059    private CircularFifoQueue<MinuteInfo> responseCounts = new CircularFifoQueue<>(50);
060    private ScheduledThreadPoolExecutor timedChecker;
061    private RollingMetrics metrics = new RollingMetrics();
062    private DailyMetrics dailyMetrics = new DailyMetrics();
063    private static HealthTracker _instance = new HealthTracker();
064
065    private HealthTracker() {
066    }
067
068    public static void start() {
069        BasicThreadFactory factory = new BasicThreadFactory.Builder()
070                .namingPattern("stallion-health-tracker-thread-%d")
071                .build();
072        instance().timedChecker = new ScheduledThreadPoolExecutor(2, factory);
073        instance().timedChecker.scheduleAtFixedRate(instance().metrics, 0, 1, TimeUnit.MINUTES);
074        instance().timedChecker.scheduleAtFixedRate(instance().dailyMetrics, 0, 24*60, TimeUnit.MINUTES);
075    }
076
077    public static HealthTracker instance() {
078        if (_instance == null) {
079            _instance = new HealthTracker();
080        }
081        return _instance;
082    }
083
084    public static void shutdown() {
085        if (_instance != null) {
086            if (_instance.timedChecker != null) {
087                _instance.timedChecker.shutdown();
088            }
089            _instance = null;
090        }
091    }
092
093    public Double getAverageSystemCpuLoad() {
094        int periods = 0;
095        Double total = 0.0;
096        for (Double usage: metrics.getSystemCpuUsage()) {
097            total += usage;
098            periods++;
099        }
100        return total / periods;
101    }
102
103    public Double getAverageAppCpuLoad() {
104        int periods = 0;
105        Double total = 0.0;
106        for (Double usage: metrics.getAppCpuUsage()) {
107            total += usage;
108            periods++;
109        }
110        return total / periods;
111    }
112
113    public Double getSwapPages() {
114        int periods = 0;
115        Double total = 0.0;
116        for (Long pages: metrics.getSwapRate()) {
117            total += pages;
118            periods++;
119        }
120        return total / periods;
121    }
122
123    public ZonedDateTime getSslExpires() {
124        return dailyMetrics.getSslExpires();
125    }
126
127    public boolean getSslExpiresIn7() {
128        return dailyMetrics.isSslExpiresWithin7();
129    }
130
131    public boolean getSslExpiresIn21() {
132        return dailyMetrics.isSslExpiresWithin21();
133    }
134
135    public HttpHealthInfo getHttpHealthInfo() {
136        HttpHealthInfo health = new HttpHealthInfo();
137        health.setError400s(lastTenMinutesCount(response400s));
138        health.setError500s(lastTenMinutesCount(response500s));
139        health.setError404s(lastTenMinutesCount(response404s));
140        health.setRequestCount(lastTenMinutesCount(responseCounts));
141        return health;
142    }
143
144    public void logException(Throwable e) {
145        if (e instanceof ClientException) {
146            return;
147        }
148        if (e instanceof InvocationTargetException) {
149            if (((InvocationTargetException) e).getTargetException() instanceof ClientException) {
150                return;
151            }
152        }
153        ExceptionInfo info = ExceptionInfo.newForException(e);
154        exceptionQueue.add(info);
155        if (SimpleAsyncRunner.instance() != null && Settings.instance().getEmailErrors() == true) {
156            SimpleAsyncRunner.instance().submit(new ExceptionEmailRunnable(info));
157        }
158    }
159
160    public int lastTenMinutesCount(CircularFifoQueue<MinuteInfo> queue) {
161        ZonedDateTime tenAgo = MinuteInfo.getCurrentMinute().minusMinutes(10);
162        int count = 0;
163        for (MinuteInfo info: queue) {
164            if (info.getMinute().isBefore(tenAgo)) {
165                continue;
166            }
167            count += info.getCount().get();
168        }
169        return count;
170    }
171
172    public void logResponse(StRequest request, StResponse response) {
173        incrementQueue(responseCounts);
174        if (response.getStatus() >= 500) {
175            // If the health endpoint is treating us as down, don't log that
176            // as a 500 error or else we will be down for ever
177            if (!request.getPath().startsWith("/st-internal/")) {
178                incrementQueue(response500s);
179            }
180        } else if (response.getStatus() == 404) {
181            incrementQueue(response404s);
182        } else if (response.getStatus() >= 400) {
183            incrementQueue(response400s);
184        }
185    }
186
187
188    public void incrementQueue(CircularFifoQueue<MinuteInfo> queue) {
189        ZonedDateTime now = MinuteInfo.getCurrentMinute();
190        MinuteInfo minuteInfo = null;
191        if (!queue.isEmpty()) {
192            minuteInfo = queue.get(queue.size()-1);
193            //minuteInfo = queue.get(0);
194            //Log.info("first: {0}", queue.get(0).getMinute());
195            //Log.info("last:  {0}", queue.get(queue.size() -1).getMinute());
196            //Log.info("now:   {0}", now);
197            if (!minuteInfo.getMinute().equals(now)) {
198                //Log.info("Minutes do not matched, prepare for new minute");
199                minuteInfo = null;
200            }
201        }
202        if (minuteInfo == null) {
203            minuteInfo = new MinuteInfo();
204            minuteInfo.setMinute(now);
205            queue.add(minuteInfo);
206        }
207        //Log.info("Increment minute {0} {1}", minuteInfo.getMinute().toString(), minuteInfo.getCount().get());
208        minuteInfo.getCount().incrementAndGet();
209    }
210
211
212
213
214    public CircularFifoQueue<ExceptionInfo> getExceptionQueue() {
215        return exceptionQueue;
216    }
217
218    public static class MinuteInfo {
219        private ZonedDateTime minute;
220        private AtomicInteger count = new AtomicInteger(0);
221        private static final DateTimeFormatter formatter = DateTimeFormatter.ofPattern("YYYY-MM-dd-HH:mm");
222
223        public static ZonedDateTime getCurrentMinute() {
224            ZonedDateTime now = DateUtils.utcNow();
225            return ZonedDateTime.of(
226                    now.getYear(), now.getMonth().getValue(), now.getDayOfMonth(),
227                    now.getHour(), now.getMinute(), 0, 0, ZoneId.of("UTC"));
228        }
229
230        public AtomicInteger getCount() {
231            return count;
232        }
233
234        public void setCount(AtomicInteger count) {
235            this.count = count;
236        }
237
238        public ZonedDateTime getMinute() {
239            return minute;
240        }
241
242        public void setMinute(ZonedDateTime minute) {
243            this.minute = minute;
244        }
245    }
246
247    public static class DailyMetrics implements Runnable {
248        private double ntpOffset = 0;
249        private ZonedDateTime sslExpires = null;
250        private boolean sslExpiresWithin21 = false;
251        private boolean sslExpiresWithin7 = false;
252
253
254        public void run() {
255            try {
256                if (Settings.instance().getSiteUrl().startsWith("https")) {
257                    checkSslExpiration();
258                }
259            } catch(Exception e) {
260                Log.exception(e, "Error checking SSL");
261            }
262
263        }
264
265        public void checkSslExpiration() throws Exception {
266            // configure the SSLContext with a TrustManager
267            SSLContext ctx = SSLContext.getInstance("TLS");
268            ctx.init(new KeyManager[0], new TrustManager[] {new DefaultTrustManager()}, new SecureRandom());
269            SSLContext.setDefault(ctx);
270            URL url = new URL(Settings.instance().getSiteUrl());
271            HttpsURLConnection conn = (HttpsURLConnection) url.openConnection();
272            conn.setHostnameVerifier(new HostnameVerifier() {
273                @Override
274                public boolean verify(String arg0, SSLSession arg1) {
275                    return true;
276                }
277            });
278            System.out.println(conn.getResponseCode());
279            Certificate[] certs = conn.getServerCertificates();
280            Date maxDate = new Date(Long.MAX_VALUE);
281            for (Certificate cert :certs){
282                X509Certificate xCert = (X509Certificate)cert;
283                if (xCert.getNotAfter().before(maxDate)) {
284                    maxDate = xCert.getNotAfter();
285                }
286            }
287
288            setSslExpires(ZonedDateTime.ofInstant(maxDate.toInstant(), GeneralUtils.UTC));
289            setSslExpiresWithin21(DateUtils.utcNow().plusDays(21).isAfter(getSslExpires()));
290            setSslExpiresWithin7(DateUtils.utcNow().plusDays(7).isAfter(getSslExpires()));
291            conn.disconnect();
292        }
293
294
295        public double getNtpOffset() {
296            return ntpOffset;
297        }
298
299        public void setNtpOffset(double ntpOffset) {
300            this.ntpOffset = ntpOffset;
301        }
302
303        public ZonedDateTime getSslExpires() {
304            return sslExpires;
305        }
306
307        public void setSslExpires(ZonedDateTime sslExpires) {
308            this.sslExpires = sslExpires;
309        }
310
311        public boolean isSslExpiresWithin21() {
312            return sslExpiresWithin21;
313        }
314
315        public DailyMetrics setSslExpiresWithin21(boolean sslExpiresWithin21) {
316            this.sslExpiresWithin21 = sslExpiresWithin21;
317            return this;
318        }
319
320        public boolean isSslExpiresWithin7() {
321            return sslExpiresWithin7;
322        }
323
324        public DailyMetrics setSslExpiresWithin7(boolean sslExpiresWithin7) {
325            this.sslExpiresWithin7 = sslExpiresWithin7;
326            return this;
327        }
328    }
329
330
331    private static class DefaultTrustManager implements X509TrustManager {
332
333        @Override
334        public void checkClientTrusted(X509Certificate[] arg0, String arg1) throws CertificateException {}
335
336        @Override
337        public void checkServerTrusted(X509Certificate[] arg0, String arg1) throws CertificateException {}
338
339        @Override
340        public X509Certificate[] getAcceptedIssuers() {
341            return null;
342        }
343    }
344
345    public static class RollingMetrics implements Runnable {
346        private CircularFifoQueue<Double> systemCpuUsage = new CircularFifoQueue<>(5);
347        private CircularFifoQueue<Double> appCpuUsage = new CircularFifoQueue<>(5);
348        private CircularFifoQueue<Long> swapRate = new CircularFifoQueue<>(5);
349        private boolean vmstatAvailable = true;
350
351        public void run() {
352            OperatingSystemMXBean os = ManagementFactory.getOperatingSystemMXBean();
353            if(os instanceof UnixOperatingSystemMXBean){
354                UnixOperatingSystemMXBean unixBean = (UnixOperatingSystemMXBean) os;
355                getAppCpuUsage().add(unixBean.getProcessCpuLoad());
356                getSystemCpuUsage().add(unixBean.getSystemLoadAverage());
357            }
358            if (Settings.instance().getEnv().equals("local") || Settings.instance().getDevMode()) {
359                vmstatAvailable = false;
360            }
361            if (vmstatAvailable) {
362                Runtime rt = Runtime.getRuntime();
363                int exitVal = 1;
364                Process proc = null;
365                try {
366                    proc = rt.exec("vmstat");
367                    exitVal = proc.exitValue();
368                } catch (IOException e) {
369                }
370                if (exitVal != 0) {
371                    // Don't check for vmstat again
372                    Log.warn("vmstat executable not found, skipping monitoring of swap rate");
373                    vmstatAvailable = false;
374                } else {
375                    ProcessHelper.CommandResult result = ProcessHelper.run("vmstat", "4", "2");
376                    if (result.succeeded()) {
377                        String[] lines = result.getOut().split("\n");
378                        String lastLine = lines[lines.length - 1];
379                        if (empty(lastLine.trim())) {
380                            lastLine = lines[lines.length - 2];
381                        }
382                        String[] stats = lastLine.trim().trim().replaceAll("\\s+", "\t").split("\t");
383                        Long swapIn = Long.parseLong(stats[6]);
384                        Long swapOut = Long.parseLong(stats[7]);
385                        getSwapRate().add(swapIn + swapOut);
386                    }
387                }
388            }
389
390
391        }
392
393
394        public CircularFifoQueue<Double> getSystemCpuUsage() {
395            return systemCpuUsage;
396        }
397
398        public void setSystemCpuUsage(CircularFifoQueue<Double> systemCpuUsage) {
399            this.systemCpuUsage = systemCpuUsage;
400        }
401
402        public CircularFifoQueue<Double> getAppCpuUsage() {
403            return appCpuUsage;
404        }
405
406        public void setAppCpuUsage(CircularFifoQueue<Double> appCpuUsage) {
407            this.appCpuUsage = appCpuUsage;
408        }
409
410        public CircularFifoQueue<Long> getSwapRate() {
411            return swapRate;
412        }
413
414        public void setSwapRate(CircularFifoQueue<Long> swapRate) {
415            this.swapRate = swapRate;
416        }
417    }
418}