#!/usr/bin/awk -f
#
# Copyright (C) 2004 Josip Deanovic <djosip@linuxpages.org>
# This program is copyrighted under GPL licence. See COPYING file
# for details.


BEGIN {
      Opterr = 1
      Optind = 1

      # Parsing command line options using getopt function.
      while ((optbuff = getopt(ARGC, ARGV, "Vhl:rm:i:s:")) != -1)
            {
            if (optbuff == "V")
               {
               print ("alco: Alco 1.1 Josip Deanovic <djosip@linuxpages.org>") > "/dev/stderr"
               exit 0
               }
            else
               if (optbuff == "h")
                  {
                  help()
                  exit 0
                  }
               else
                  if (optbuff == "l" && (Optarg == "common" || Optarg == "combined"))
                     LOGFORMAT=Optarg
                  else
                     if (optbuff == "r")
                        {
                        REFERRERS="1"
                        }
                     else
                        if (optbuff == "m" && Optarg != "")
                           {
                           SHOW_MATCHED=Optarg
                           }
                        else
                           if (optbuff == "i" && Optarg != "")
                              {
                              INVERT_MATCHED=Optarg
                              }
                           else
                              if (optbuff == "s" && Optarg != "")
                                 {
                                 MATCHED_HTTP_STATUS_CODES=Optarg
                                 }
                              else
                                 {
                                 help()
                                 exit 1
                                 }
            }

      for (counter = 1; counter < Optind; counter++)
          ARGV[counter] = ""

      # Foreground colors
      RED="\033[0;31m"
      GREEN="\033[0;32m"
      YELLOW="\033[0;33m"
      BLUE="\033[0;34m"
      MAGENTA="\033[0;35m"
      CYAN="\033[0;36m"
      WHITE="\033[0;37m"
      DEFAULT="\033[0;39m"
      # Bold foreground colors
      BRED="\033[1;31m"
      BGREEN="\033[1;32m"
      BYELLOW="\033[1;33m"
      BBLUE="\033[1;34m"
      BMAGENTA="\033[1;35m"
      BCYAN="\033[1;36m"
      BWHITE="\033[1;37m"
      # Inverted foreground colors
      IRED="\033[7;31m"
      IGREEN="\033[7;32m"
      IYELLOW="\033[7;33m"
      IBLUE="\033[7;34m"
      IMAGENTA="\033[7;35m"
      ICYAN="\033[7;36m"
      IWHITE="\033[7;37m"
      IDEFAULT="\033[7;39m"
      }


function help ()
              {
              print ("Alco 1.1 - Apache log colorizing tool") > "/dev/stderr"
              print ("Usage: alco <awk_options> [--] <options> [<filename>]") > "/dev/stderr"
              print ("       tail -f filename | alco") > "/dev/stderr"
              print ("	--		this should go before non awk options") > "/dev/stderr"
              print ("	--re-interval	this is an awk option. When used, it enables intervals") > "/dev/stderr"
              print ("			in awk and script will be able to recognize log format") > "/dev/stderr"
              print ("	-l format	define the format of your log file (eider common or") > "/dev/stderr"
              print ("			combined), default is common") > "/dev/stderr"
              print ("	-m pattern	only show lines that match first field with pattern") > "/dev/stderr"
              print ("	-i pattern	show matched vhosts/IPs with inverted color") > "/dev/stderr"
              print ("	-s pattern	show matched (comma delimited) HTTP status codes in") > "/dev/stderr"
              print ("			bold red color") > "/dev/stderr"
              print ("	-r		show referrer field") > "/dev/stderr"
              print ("	-V		output version and exit") > "/dev/stderr"
              print ("	-h		show this help screen and exit") > "/dev/stderr"
              print ("\"--re-interval\" is an awk option and it should go before \"--\".") > "/dev/stderr"
              }


# This function comes from gawk ducumentation (gawk.ps, page 187-190).
# The same function comes with gawk package in file getopt.awk. Position
# of this file depends of distribution, so I decided to include this
# function into this file.
function getopt (argc, argv, options, thisopt, i)
                {
                if (length(options) == 0)
                   return -1

                if (argv[Optind] == "--")
                   {
                   Optind++
                   _opti = 0
                   return -1
                   }
                else
                   if (argv[Optind] !~ /^-[^: \t\n\f\r\v\b]/)
                      {
                      _opti = 0
                      return -1
                      }

                if (_opti == 0)
                   _opti = 2
                thisopt = substr(argv[Optind], _opti, 1)
                Optopt = thisopt
                i = index(options, thisopt)

                if (i == 0)
                   {
                   if (Opterr)
                      printf ("%c -- invalid option\n", thisopt) > "/dev/stderr"

                   if (_opti >= length(argv[Optind]))
                      {
                      Optind++
                      _opti = 0
                      }
                   else
                      _opti++

                   return "?"
                   }

                if (substr(options, i + 1, 1) == ":")
                   {
                   if (length(substr(argv[Optind], _opti + 1)) > 0)
                      Optarg = substr(argv[Optind], _opti + 1)
                   else
                      Optarg = argv[++Optind]
                   _opti = 0
                   }
                else
                   Optarg = ""

                if (_opti == 0 || _opti >= length(argv[Optind]))
                   {
                   Optind++
                   _opti = 0
                   }
                else
                   _opti++

                return thisopt
                }


function setcolor (COLOR, MESSAGE)
                  {
                  printf("%s%s%s", COLOR, MESSAGE, DEFAULT)
                  }


# This function will parse and colorize lines. It depends on log format used
# in target log file.
function log_format (format)
                    {
                    if (format == "common")
                       {
                       if (SHOW_MATCHED == "" || match (tolower ($1), tolower(SHOW_MATCHED)))
                          {
                          # Coloring every noncontigous virtual host with different color.
                          if (VHOST != $1)
                             {
                             VSTATE++
                             VHOST=$1
                             }

                          if (VSTATE % 2 == 1)
                             {
                             if (INVERT_MATCHED != "" && match (tolower ($1), tolower(INVERT_MATCHED)))
                                printf("%s ", setcolor(IWHITE, $1))
                             else
                                printf("%s ", setcolor(WHITE, $1))
            
                             VSTATE=1
                             }
                          else
                             {
                             if (INVERT_MATCHED != "" && match (tolower ($1), tolower (INVERT_MATCHED)))
                                printf("%s ", setcolor(IDEFAULT, $1))
                             else
                                printf("%s ", $1)
                             }

                          # Coloring every noncontigous IP with different color.
                          if (IP != $2)
                             {
                             IPSTATE++
                             IP=$2
                             }

                          if (IPSTATE % 2 == 1)
                             {
                             printf("%s ", setcolor(YELLOW, $2))
                             IPSTATE=1
                             }
                          else
                             {
                             printf("%s ", $2)
                             }

                          printf ("%s %s %s %s ", $3, $4, $5, $6)

                          if (match ($7, /GET/) == 0)
                             printf("%s ", setcolor(YELLOW, $7))
                          else
                             printf("%s ", $7)

                          if (match (tolower($8), /\/$/) != 0)
                             printf("%s ", setcolor(BLUE, $8))
                          else
                             if (match (tolower($8), /\.jpg$|\.jpeg$|\.png$|\.xpm$|\.gif$|\.xbm$|\.bmp$/) != 0)
                                printf("%s ", setcolor(MAGENTA, $8))
                             else
                               if (match (tolower($8), /\.mpg$|\.mpeg$|\.avi$|\.vmw$|\.divx?$|\.mov$|\.rm$/) != 0)
                                  printf("%s ", setcolor(BMAGENTA, $8))
                               else
                                  if (match (tolower($8), /\.php?|\.phtml?/) != 0)
                                     printf("%s ", setcolor(BWHITE, $8))
                                  else
                                     if (match (tolower($8), /\.shtml?/) != 0)
                                        printf("%s ", setcolor(CYAN, $8))
                                     else
                                        if (match (tolower($8), /\.gz$|\.bz$|\.bz2$|\.rpm$|\.deb$|\.tgz$|\.pkg$|\.zip$|\.shar$|\.cpio$|\.Z$|\.lha$|\.bz$arj|\.rar$/) != 0)
                                           printf("%s ", setcolor(RED, $8))
                                        else
                                           if (match (tolower($8), /\.cgi?|\.pl?|\.py?/) != 0)
                                              printf("%s ", setcolor(GREEN, $8))
                                           else
                                              printf("%s ", $8)

                          printf ("%s ", $9)

                          if (match (MATCHED_HTTP_STATUS_CODES, $10) != 0)
                             printf("%s ", setcolor(BRED, $10))
                          else
                             if ($10 >= 400 && $10 < 500)
                                printf("%s ", setcolor(BWHITE, $10))
                             else
                                if ($10 >= 500 && $10 < 600)
                                   printf("%s ", setcolor(BYELLOW, $10))
                                else
                                   printf ("%s ", $10)

                          if (REFERRERS == "1")
                             printf ("%s %s\n", $11, $12)
                          else
                             printf ("%s\n", $11)
                          }
                       }

                    if (format == "combined")
                       {
                       if (SHOW_MATCHED == "" || match (tolower ($1), tolower(SHOW_MATCHED)))
                          {
                          # Coloring every noncontigous IP with different color.
                          if (IP != $1)
                             {
                             IPSTATE++
                             IP=$1
                             }

                          if (IPSTATE % 2 == 1)
                             {
                             if (INVERT_MATCHED != "" && match (tolower ($1), tolower(INVERT_MATCHED)))
                                printf("%s ", setcolor(IYELLOW, $1))
                             else
                                printf("%s ", setcolor(YELLOW, $1))

                             IPSTATE=1
                             }
                          else
                             {
                             if (INVERT_MATCHED != "" && match (tolower ($1), tolower (INVERT_MATCHED)))
                                printf("%s ", setcolor(IDEFAULT, $1))
                             else
                                printf("%s ", $1)
                             }

                          printf ("%s %s %s %s ", $2, $3, $4, $5)

                          if (match ($6, /GET/) == 0)
                             printf("%s ", setcolor(YELLOW, $6))
                          else
                             printf("%s ", $6)

                          if (match (tolower($7), /\/$/) != 0)
                             printf("%s ", setcolor(BLUE, $7))
                          else
                             if (match (tolower($7), /\.jpg$|\.jpeg$|\.png$|\.xpm$|\.gif$|\.xbm$|\.bmp$/) != 0)
                                printf("%s ", setcolor(MAGENTA, $7))
                             else
                               if (match (tolower($7), /\.mpg$|\.mpeg$|\.avi$|\.vmw$|\.divx?$|\.mov$|\.rm$/) != 0)
                                  printf("%s ", setcolor(BMAGENTA, $7))
                               else
                                  if (match (tolower($7), /\.php?|\.phtml?/) != 0)
                                     printf("%s ", setcolor(BWHITE, $7))
                                  else
                                     if (match (tolower($7), /\.shtml?/) != 0)
                                        printf("%s ", setcolor(CYAN, $7))
                                     else
                                        if (match (tolower($7), /\.gz$|\.bz$|\.bz2$|\.rpm$|\.deb$|\.tgz$|\.pkg$|\.zip$|\.shar$|\.cpio$|\.Z$|\.lha$|\.bz$arj|\.rar$/) != 0)
                                           printf("%s ", setcolor(RED, $7))
                                        else
                                           if (match (tolower($7), /\.cgi?|\.pl?|\.py?/) != 0)
                                              printf("%s ", setcolor(GREEN, $7))
                                           else
                                              printf("%s ", $7)

                          printf ("%s ", $8)

                          if (match (MATCHED_HTTP_STATUS_CODES, $9) != 0)
                             printf("%s ", setcolor(BRED, $9))
                          else
                             if ($9 >= 400 && $9 < 500)
                                printf("%s ", setcolor(BWHITE, $9))
                             else
                                if ($9 >= 500 && $9 < 600)
                                   printf("%s ", setcolor(BYELLOW, $9))
                                else
                                   printf ("%s ", $9)

                          if (REFERRERS == "1")
                             printf ("%s %s\n", $10, $11)
                          else
                             printf ("%s\n", $10)
                          }
                       }
                    }


# Finally, this is the main part of the script.  :-)
{
# If LOGFORMAT is defined we will not try to recognize log format.
if (LOGFORMAT == "common" || LOGFORMAT == "combined")
   log_format(LOGFORMAT)
else
   # If first element has the format of IP address then we have common
   # log format. Otherwise, we have combined log format.
   # Regular expression used to match IP address in the first element
   # requires --re-interval awk option to enable intervals in awk.
   if (match ($1, /([0-9]{1,3}\.){3}[0-9]{1,3}/) == 0)
      log_format("common")
   else
      log_format("combined")
}

