Tags: #python #logs
"""
Example:
python logs.py -i "10 seconds ago"
python logs.py -i "10 seconds ago" -x "now"
python logs.py -i "10 seconds ago" -x "now" -u "obiwan"
python logs.py --min "today at 9.20" --max "today at 10.20" --upstream "obiwan"
""" # noqa
import argparse
import re
import subprocess
from collections import Counter
from datetime import datetime
parser = argparse.ArgumentParser(description="Process Site Router - nginx - Logs")
parser.add_argument("-i", "--min", type=str, required=True, help="When to start getting logs") # noqa
parser.add_argument("-x", "--max", type=str, default="now", help="When to stop getting logs") # noqa
parser.add_argument("-u", "--upstream", type=str, default="bpager", help="Filter logs by this upstream") # noqa
args = parser.parse_args()
def upstreams(logs):
result = set()
for line in logs:
match = re.search("upstream \(([^:]+)", line)
if match:
result.add(match.groups(0)[0])
return sorted(result)
def filter_logs_by_upstream(logs, upstream):
log_lines_keep = []
for line in logs:
result = re.search("upstream \(([^:]+)", line)
if result and result.groups(0)[0] == upstream:
log_lines_keep.append(line)
return log_lines_keep
def filter_logs_by_gets_that_were_errors(logs):
log_lines_gets = []
for line in logs:
result = re.search("GET /.+ HTTP/1.1 [4-5][0-9]{2}", line)
if result:
log_lines_gets.append(line)
return log_lines_gets
def parse_log_urls(logs):
if len(logs) < 1:
return "no 4xx/5xx errors in the provided log data time period"
log_processed = []
for line in logs:
result = re.search("(GET /.+ HTTP/1.1 [4-5][0-9]{2})", line)
if result:
log_processed.append(result.groups(0)[0])
return sorted(log_processed)
def timeit(fn):
def helper():
start = datetime.now()
print("log retrieval started:", start)
logs = fn()
finish = datetime.now()
print(f"log retrieval finished: {finish}\n")
difference = finish - start
time_taken = f"{round(difference.seconds / 60)}mins" if difference.seconds >= 60 else f"{difference.seconds}s" # noqa
print(f"time taken to retrieve the logs: {time_taken}\n")
print(f"number of log lines: {len(logs)}")
print("upstreams found within the logs:\n")
for upstream in upstreams(logs):
print(f"\t{upstream}")
print("\n")
return logs
return helper
@timeit
def process():
command = [
"papertrail",
"-g",
"rig-web-public",
f'--min-time="{args.min}"',
f'--max-time="{args.max}"',
"program:web-public/site_router/"
]
output = subprocess.run(command, stdout=subprocess.PIPE)
logs = output.stdout.decode("utf-8").splitlines()
return logs
# Acquire a set of logs for the specified time period
logs = process()
# Filter logs so we only keep those for a specific upstream
logs_keep = filter_logs_by_upstream(logs, args.upstream)
print(f"logs we're keeping (upstream: {args.upstream}): {len(logs_keep)}")
# Filter logs so we only keep 4xx/5xx GET requests
logs_gets = filter_logs_by_gets_that_were_errors(logs_keep)
print(f"logs after filtering for 4xx/5xx GET requests: {len(logs_gets)}\n")
# Track the top 10 4xx/5xx URLs
result_for_parse_log_urls = parse_log_urls(logs_gets)
count = Counter(result_for_parse_log_urls)
for item in count.most_common(10):
print(f"{item[0]}\ncount: {item[1]}\n")