#!/usr/bin/env python
# Based on the command line:
# $ awk '$1 ~ /^201[1-9],/ {n = $2$3} !($1 == "2017,") && ($1 != "") {++m[n]} END {for (i in m) {print i, m[i]}}' ~/docs/log | awk '{printf($1" ");for (i=$2;i>0;i--) {printf("-")} print ""}' | sort

from collections import defaultdict
from os import path
import re
import datetime


LOGFILE = path.expandvars("$HOME/docs/log")


def main():
    log = loadlog(LOGFILE)
    histogram(log)


def loadlog(logfile):
    log = defaultdict(list)
    date_re = re.compile(r'\d+,')

    with open(logfile) as logf:
        for line in logf.readlines():
            line = line.strip()

            if date_re.match(line):
                date = datetime.datetime.strptime(line, "%Y, %B %d")
                continue

            if line:
                log[date].append(line)

    return log


def daterange(start_date, end_date):
    if start_date <= end_date:
        for n in range((end_date - start_date).days + 1):
            yield start_date + datetime.timedelta(n)
    else:
        for n in range((start_date - end_date).days + 1):
            yield start_date - datetime.timedelta(n)


def histogram(log):
    start = datetime.datetime(year = 2017, month = 1, day = 1)
    end = datetime.datetime.today() # datetime.datetime(year = 2017, month = 2, day = 1)
    ctr = 0
    for day in daterange(start, end):
        if not (ctr%7):print
        ctr += 1
        print day.strftime("%Y, %a %b %d"), len(log[day]) * '-'


if __name__ == '__main__':
    main()