/* analog.c 0.9beta3 */ /* Please read the README, or http://www.statslab.cam.ac.uk/~sret1/analog/ */ /* Update history: /* to 0.8: initial program, just default options */ /* 0.89: added commandline args, and many new options */ /* 0.89beta2: solved problem with over-long log entries */ /* 0.89beta3: Hash tables introduced for all categories except domains */ /* Now count number of distinct hosts */ /* Included all successes (searches are stripped down to ?) */ /* 0.89beta4: Understands searches even when the URL gets very long */ /* 0.9beta: Introduced HOSTURL, and subdomain analysis. */ /* Fixed bug that required logfile to be in chronological order. */ /* Translated it into ANSI C (from K & R) */ /* 0.9beta2: Wrote my own scanf; made whole program 30% faster! */ /* Included 304's in successful, not redirected requests. */ /* 0.9beta3: Domains implemented as non-clashing hash table. */ /* Calculated reqs/bytes per day correctly for short periods. */ #define VERSION "0.9beta3" /* the version number of this program */ /* #define PROF /* include this line for home-made profiling */ #include #include #include #include #include #include #define TRUE (1) #define FALSE (0) #define ON (TRUE) #define OFF (FALSE) #define OK (0) #define ERR (-1) #define BYREQUESTS (0) /* ways of sorting */ #define BYBYTES (1) #define ALPHABETICAL (2) #define ALL (2) /* see PAGELINKS in analhead.h */ #define COMMON (2) /* three types of input lines */ #define NCSAOLD (1) #define CORRUPT (0) typedef int flag; #define MAX(a,b) (((a)>(b))?(a):(b)) #define MIN(a,b) (((a)<(b))?(a):(b)) #define FIRSTMONTH (23891) /* earliest poss month = 12 * year + month */ /* Nov 1990; very conservative */ #define DOMHASHSIZE (1354) /* = 2 * 26^2 + 2 by description of domain algorithm */ #include "analhead.h" /* (Global) commandline variables */ /* Defaults for all these can be set in analhead.h; they are explained there */ char logfile[MAXSTRINGLENGTH]; char domainsfile[MAXSTRINGLENGTH]; flag mq, dq, hq, oq, iq, rq, sq, q7; /* whether we want each type of report */ int monthlyunit, dailyunit, hourlyunit; /* the size of the mark in the graphical displays */ int domfloor, domsortby, dirfloor, dirsortby; int dirlevel, reqfloor, reqsortby, pagewidth; char markchar; char hostname[MAXSTRINGLENGTH], hosturl[MAXSTRINGLENGTH]; int kq; /* default PAGELINKS */ void commandline(int argc, char **argv) { int i; /* First put in the default values */ strcpy(logfile, LOGFILE); strcpy(domainsfile, DOMAINSFILE); mq = MONTHLY; dq = DAILY; hq = HOURLY; oq = DOMAIN; iq = DIRECTORY; rq = REQUEST; sq = COUNTHOSTS; q7 = LASTSEVEN; monthlyunit = MONTHLYUNIT; hourlyunit = HOURLYUNIT; dailyunit = DAILYUNIT; domsortby = DOMSORTBY; dirsortby = DIRSORTBY; reqsortby = REQSORTBY; if (domsortby == BYBYTES) domfloor = MIN_DOM_BYTES; else domfloor = MIN_DOM_REQS; if (reqsortby == BYBYTES) reqfloor = MIN_URL_BYTES; else reqfloor = MIN_URL_REQS; if (dirsortby == BYBYTES) dirfloor = MIN_DIR_BYTES; else dirfloor = MIN_DIR_REQS; dirlevel = DIRLEVEL; pagewidth = PAGEWIDTH; markchar = MARKCHAR; strcpy(hostname, HOSTNAME); strcpy(hosturl, HOSTURL); kq = PAGELINKS; /* now read the arguments */ for (i = 1; i < argc; i++) { if (argv[i][0] != '+' && argv[i][0] != '-') strcpy(logfile, argv[i]); else switch (argv[i][1]) { case '\0': /* read stdin */ strcpy(logfile, "stdin"); break; case '7': /* stats for last 7 days */ if (argv[i][0] == '-') q7 = OFF; else q7 = ON; break; case 'c': /* markchar */ markchar = argv[i][2]; break; case 'd': /* daily summary */ if (argv[i][0] == '-') dq = OFF; else { dq = ON; if (argv[i][2] != '\0') dailyunit = atoi(argv[i] + 2); } break; case 'f': /* domains file */ strcpy(domainsfile, argv[i] + 2); break; case 'h': /* hourly summary */ if (argv[i][0] == '-') hq = OFF; else { hq = ON; if (argv[i][2] != '\0') hourlyunit = atoi(argv[i] + 2); } break; case 'i': /* directory report */ if (argv[i][0] == '-') iq = OFF; else { iq = ON; switch (argv[i][2]) { case 'a': dirsortby = ALPHABETICAL; if (argv[i][3] == '\0') dirfloor = MIN_DIR_REQS; else dirfloor = atoi(argv[i] + 3); break; case 'b': dirsortby = BYBYTES; if (argv[i][3] == '\0') dirfloor = MIN_DIR_BYTES; else dirfloor = atoi(argv[i] + 3); break; case 'r': dirsortby = BYREQUESTS; if (argv[i][3] == '\0') dirfloor = MIN_DIR_REQS; else dirfloor = atoi(argv[i] + 3); break; default: if (argv[i][2] != '\0') dirfloor = atoi(argv[i] + 2); } } break; case 'k': /* link to pages in req. report? */ if (argv[i][0] == '-') kq = OFF; else { kq = ON; if (argv[i][2] == 'k') kq = ALL; } break; case 'l': /* 'level' of dir report */ dirlevel = atoi(argv[i] + 2); break; case 'm': /* monthly report */ if (argv[i][0] == '-') mq = OFF; else { mq = ON; if (argv[i][2] == '\0') monthlyunit = atoi(argv[i] + 2); } break; case 'n': /* hostname */ strcpy(hostname, argv[i] + 2); break; case 'o': /* domain report */ if (argv[i][0] == '-') oq = OFF; else { oq = ON; switch (argv[i][2]) { case 'a': domsortby = ALPHABETICAL; if (argv[i][3] == '\0') domfloor = MIN_DOM_REQS; else domfloor = atoi(argv[i] + 3); break; case 'b': domsortby = BYBYTES; if (argv[i][3] == '\0') domfloor = MIN_DOM_BYTES; else domfloor = atoi(argv[i] + 3); break; case 'r': domsortby = BYREQUESTS; if (argv[i][3] == '\0') domfloor = MIN_DOM_REQS; else domfloor = atoi(argv[i] + 3); break; default: if (argv[i][2] != '\0') domfloor = atoi(argv[i] + 2); } } break; case 'r': /* request report */ if (argv[i][0] == '-') rq = OFF; else { rq = ON; switch (argv[i][2]) { case 'a': reqsortby = ALPHABETICAL; if (argv[i][3] == '\0') reqfloor = MIN_URL_REQS; else reqfloor = atoi(argv[i] + 3); break; case 'b': reqsortby = BYBYTES; if (argv[i][3] == '\0') reqfloor = MIN_URL_BYTES; else reqfloor = atoi(argv[i] + 3); break; case 'r': reqsortby = BYREQUESTS; if (argv[i][3] == '\0') reqfloor = MIN_URL_REQS; else reqfloor = atoi(argv[i] + 3); break; default: if (argv[i][2] != '\0') reqfloor = atoi(argv[i] + 2); } } break; case 's': /* count hosts? */ if (argv[i][0] == '-') sq = OFF; else sq = ON; break; case 'u': /* host URL */ strcpy(hosturl, argv[i] + 2); break; case 'w': /* pagewidth */ pagewidth = atoi(argv[i] + 2); if (pagewidth < MINPAGEWIDTH || pagewidth > MAXPAGEWIDTH) { fprintf(stderr, "Warning: at option %s, page width should be between %d and %d\n", MINPAGEWIDTH, MAXPAGEWIDTH); fprintf(stderr, "Resetting to default value of %d\n", PAGEWIDTH); pagewidth = PAGEWIDTH; } break; default: fprintf(stderr, "Warning: Ignoring unknown option %s: see README for correct usage\n", argv[i]); fprintf(stderr, "or go to http://www.statslab.cam.ac.uk/~sret1/analog/\n"); } } } int strtomonth(char month[3]) /* convert 3 letter month abbrev. to int */ { int monthno; switch (month[0]) { case 'A': switch (month[1]) { case 'p': monthno = 4; break; case 'u': monthno = 8; break; } break; case 'D': monthno = 12; break; case 'F': monthno = 2; break; case 'J': switch (month[1]) { case 'a': monthno = 1; break; case 'u': switch (month[2]) { case 'l': monthno = 7; break; case 'n': monthno = 6; break; } break; } break; case 'M': switch (month[2]) { case 'r': monthno = 3; break; case 'y': monthno = 5; break; } break; case 'N': monthno = 11; break; case 'O': monthno = 10; break; case 'S': monthno = 9; break; } return(monthno); } int dateoffset[13] = {0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334}; int dayofdate(int date, int monthno, int year) /* day of week of given date */ { int x; x = dateoffset[monthno] + date + year + (year / 4) + 5; /* every fourth year until 2099 is a leap year */ return(x % 7); } int minsbetween(int date1, int monthno1, int year1, int hr1, int min1, int date2, int monthno2, int year2, int hr2, int min2) { int x, y; x = dateoffset[monthno1] + date1 + year1 * 365 + (year1 / 4); y = dateoffset[monthno2] + date2 + year2 * 365 + (year2 / 4); return((y - x) * 1440 + (hr2 - hr1) * 60 + (min2 - min1)); } void int3printf(int x) /* print +ve integer with spaces every 3 digits */ { int i = 1; while (x / 1000 >= i) /* i * 1000 might overflow */ i *= 1000; /* find how big x is, so we know where to start */ printf("%d", (x / i) % 1000); /* now run down again, printing each clump */ for ( i /= 1000; i >= 1; i /= 1000) printf(" %03d", (x / i) % 1000); } void double3printf(double x) /* the same, only with +ve doubles */ { double i = 1; while (x / 1000 >= i) i *= 1000; printf("%d", ((int)(x / i)) % 1000); for ( i /= 1000; i >= 1; i /= 1000) printf(" %03d", ((int)(x / i)) % 1000); } /* Define functions to replace scanf, which is very slow */ int sscanf_common(char *inputline, char hostn[MAXSTRINGLENGTH], int *date, char month[4], int *year, int *hr, int *min, char filename[MAXSTRINGLENGTH], int *code, char bytestr[16]) { char *cin = inputline; /* the character we are reading */ char *cout; /* where we are putting it */ int i; /* read in hostname */ i = 0; for (cout = hostn; *cin != ' ' && *cin != '\0' && i < MAXSTRINGLENGTH - 1; cin++) { *cout = *cin; cout++; i++; } if (*cin != ' ') return(0); *cout = '\0'; /* scan until next '[' */ for (cin++; *cin != '[' && *cin != '\0'; cin++) ; if (*cin == '\0') return(1); /* read in date */ cin++; if (!isdigit(*cin)) return(1); else *date = 10 * (*cin - '0'); cin++; if (!isdigit(*cin)) return(1); else *date += (*cin - '0'); /* read in month */ cin++; if (*cin != '/') return(2); cin++; cout = month; for (i = 0; i < 3 && *cin != '\0'; i++) { *cout = *cin; cout++; cin++; } if (*cin == '\0') return(2); *cout = '\0'; /* read in year */ if (*cin != '/') return(3); cin++; if (!isdigit(*cin)) return(3); else *year = 1000 * (*cin - '0'); cin++; if (!isdigit(*cin)) return(3); else *year += 100 * (*cin - '0'); cin++; if (!isdigit(*cin)) return(3); else *year += 10 * (*cin - '0'); cin++; if (!isdigit(*cin)) return(3); else *year += (*cin - '0'); /* read in hour */ cin++; if (*cin != ':') return(4); cin++; if (!isdigit(*cin)) return(4); else *hr = 10 * (*cin - '0'); cin++; if (!isdigit(*cin)) return(4); else *hr += (*cin - '0'); /* read in minute */ cin++; if (*cin != ':') return(5); cin++; if (!isdigit(*cin)) return(5); else *min = 10 * (*cin - '0'); cin++; if (!isdigit(*cin)) return(5); else *min += (*cin - '0'); /* ignore second & timezone; so scan to next '"' */ for (cin++; *cin != '"' && *cin != '\0'; cin++) ; if (*cin == '\0') return(6); /* ignore method; so read to next ' ' */ for (cin++; *cin != ' ' && *cin != '\0'; cin++) ; if (*cin == '\0') return(6); /* read in filename */ cin++; i = 0; for (cout = filename; *cin != ' ' && *cin != '\0' && *cin != '"' && *cin != '?' && i < MAXSTRINGLENGTH - 1; cin++) { *cout = *cin; cout++; i++; } if (*cin != ' ' && *cin != '"' && *cin != '?') return(6); *cout = '\0'; /* scan to next " */ for ( ; *cin != '"' && *cin != '\0' ; cin++) ; if (*cin == '\0') return(7); /* read in return code; always 3 digits */ cin++; if (*cin != ' ') return(7); cin++; if (!isdigit(*cin)) return(7); else *code = 100 * (*cin - '0'); cin++; if (!isdigit(*cin)) return(7); else *code += 10 * (*cin - '0'); cin++; if (!isdigit(*cin)) return(7); else *code += (*cin - '0'); /* finally, read in bytestr */ cin++; if (*cin != ' ') return (8); cin++; i = 0; for (cout = bytestr; *cin != ' ' && *cin != '\n' && i < 16; cin++) { *cout = *cin; cout++; } *cout = '\0'; return(9); } int sscanf_ncsaold(char *inputline, char hostn[MAXSTRINGLENGTH], char month[4], int *date, int *hr, int *min, int *year, char filename[MAXSTRINGLENGTH]) { char *cin = inputline; /* the character we are reading */ char *cout; /* where we are putting it */ int i; /* read in hostname */ i = 0; for (cout = hostn; *cin != ' ' && *cin != '\0' && i < MAXSTRINGLENGTH - 1; cin++) { *cout = *cin; cout++; i++; } if (*cin != ' ') return(0); *cout = '\0'; /* scan until next '[' */ for (cin++; *cin != '[' && *cin != '\0'; cin++) ; if (*cin == '\0') return(1); /* ignore day of week, so scan until next ' ' */ for (cin++; *cin != ' ' && *cin != '\0'; cin++) ; if (*cin == '\0') return(1); /* read in month */ cin++; cout = month; for (i = 0; i < 3 && *cin != '\0'; i++) { *cout = *cin; cout++; cin++; } if (*cin == '\0') return(2); *cout = '\0'; /* read in date */ if (*cin != ' ') return(3); cin++; if (!isdigit(*cin) && *cin != ' ') return(3); else if (*cin != ' ') *date = 10 * (*cin - '0'); else *date = 0; cin++; if (!isdigit(*cin)) return(3); else *date += (*cin - '0'); /* read in hour */ cin++; if (*cin != ' ') return(4); cin++; if (!isdigit(*cin)) return(4); else *hr = 10 * (*cin - '0'); cin++; if (!isdigit(*cin)) return(4); else *hr += (*cin - '0'); /* read in minute */ cin++; if (*cin != ':') return(5); cin++; if (!isdigit(*cin)) return(5); else *min = 10 * (*cin - '0'); cin++; if (!isdigit(*cin)) return(5); else *min += (*cin - '0'); /* ignore second; skip to year and read it */ cin++; cin++; cin++; cin++; cin++; if (!isdigit(*cin)) return(6); else *year = 1000 * (*cin - '0'); cin++; if (!isdigit(*cin)) return(6); else *year += 100 * (*cin - '0'); cin++; if (!isdigit(*cin)) return(6); else *year += 10 * (*cin - '0'); cin++; if (!isdigit(*cin)) return(6); else *year += (*cin - '0'); /* ignore method, so skip to second space */ for (cin++; *cin != ' ' && *cin != '\0'; cin++) ; if (*cin == '\0') return(7); for (cin++; *cin != ' ' && *cin != '\0'; cin++) ; if (*cin == '\0') return(7); /* finally, read in the filename */ cin++; i = 0; for (cout = filename; *cin != ' ' && *cin != '\n' && *cin != '?' && i < MAXSTRINGLENGTH - 1; cin++) { *cout = *cin; cout++; i++; } if (i = MAXSTRINGLENGTH - 1) return(7); *cout = '\0'; return (8); } int main(int argc, char **argv) { FILE *lf, *df; /* logfile, domains file */ int rc; /* return code */ char inputline[MAXLINELENGTH]; /* a particular input line */ int linetype; /* COMMON, NCSAOLD or CORRUPT */ char hostn[MAXSTRINGLENGTH]; int day, date, monthno, year, monthcode, hr, min; int firstdate, firstmonthno, firstyear, firstmonthcode, firsthr, firstmin; int lastdate, lastmonthno, lastyear, lastmonthcode, lasthr, lastmin; int timecode; /* monthcode * 60 * 24 * 31 + date * 60 * 24 + hr * 60 + min */ int firsttimecode = 1000000000, lasttimecode = 0; /* first and last stats analysed */ int totalmins; /* between first and last entries analysed */ int olddate, oldmonthno, oldyear, oldhr, oldmin, oldsec, oldmonthcode; int oldtimecode; /* a week before present */ char oldmonth[4]; char dayname[7][4] = {"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"}; char monthname[13][4] = {"", "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"}; char month[4]; char filename[MAXSTRINGLENGTH]; int code; double bytes; /* long is not big enough; double has more sig. figs, and copes with overflow automatically. */ char bytestr[16]; int dirsufflength; /* the length of DIRSUFFIX */ int bq = ON; /* Count bytes? On until we find a line in old format. */ int fieldwidth; /* width we require to print certain integers in */ int monthlyreq[MAXMONTHS]; /* # requests in each month */ int maxmonthlyreq = 0; /* the maximum of those */ int dailyreq[7]; int maxdailyreq = 0; int hourlyreq[24]; int maxhourlyreq = 0; int total_fail_reqs = 0; /* the number of failed requests so far (400s, 500s) */ int total_fail_reqs7 = 0; /* in last 7 days */ int total_succ_reqs = 0; /* the number of successful requests so far (200s) */ int total_succ_reqs7 = 0; int total_other_reqs = 0; /* redirects (300s) */ int total_other_reqs7 = 0; double total_bytes = 0; double total_bytes7 = 0; int corrupt_lines = 0; /* the number of corrupt lines in the logfile */ /* (These tend to be URLs with spaces or quotes in */ int no_urls = 0; /* the number of distinct URLs found so far */ int no_urls7 = 0; /* the number used in the last 7 days */ int url_max_reqs = 0; /* the maximum number of requests for any URL */ struct url { /* define a structure containing information about a URL ... */ char name[MAXSTRINGLENGTH]; /* the name of the URL */ int reqs; /* the number of requests it has received */ double bytes; /* the number of bytes transferred due to it */ flag last7; /* whether it has been used in the last 7 days */ struct url *next; /* the next url in the list */ } *urlhead[URLHASHSIZE], *urlsorthead, *urlp, *urlp2, *urllastp, *urlnextp; /* ... and set up some pointers to such structures */ int no_dirs = 0; /* directories ditto */ int dir_max_reqs = 0; struct dir { char name[MAXSTRINGLENGTH]; int reqs; double bytes; struct dir *next; } *dirhead[DIRHASHSIZE], *dirsorthead, *dirp, *dirp2, *dirlastp, *dirnextp; int no_hosts = 0; /* hosts ditto */ int no_hosts7 = 0; /* the number of all hosts in the last 7 days */ int no_new_hosts7 = 0; /* the number of new hosts in the last 7 days */ struct host { char name[MAXSTRINGLENGTH]; flag last7; struct host *next; } *hosthead[HOSTHASHSIZE], *hostp; struct domain { /* and domains */ char id[256]; /* can be as long as host for domains like 'statslab.cam.ac.uk' */ char name[256]; /* the geographical location of the domain */ int reqs; double bytes; int nexti; /* the index of the next domain alphabetically */ struct domain *next; /* the next subdomain of the current domain */ } *domainhead[DOMHASHSIZE], *domp, *domp2, *domlastp; int firstdom, domnextj; /* for sorting */ int dom_max_reqs = 0; /* the max. of domain_reqs */ char domainname[MAXSTRINGLENGTH]; /* a domain for a particular request */ flag last7q; /* are we now in the last 7 days? */ int magicnumber; /* the magic hash number of a file, host etc. */ int onlist; /* which list we are on, while sorting */ double bytepc; int bytepc1, bytepc2; /* the % of bytes for a particular URL */ struct timeval starttime, stoptime; long oldtime; struct timezone tzp; char starttimestr[26]; char oldtimestr[26]; int i, j, tempint; /* useful bits and bobs */ double tempdouble; flag tempflag; char *tempp; char tempstr[MAXSTRINGLENGTH], tempstr2[MAXSTRINGLENGTH]; #ifdef PROF long timein = 0, timeout = 0, timescan1 = 0, timescan2 = 0; long timescan = 0, timehost = 0, timesort = 0, timedomsort = 0; long timegets = 0, timedom = 0, timedir = 0, timereq = 0, timeother = 0, timetot = 0; struct timeval lasttime, thistime; #endif /* Initialisation */ gettimeofday(&starttime, &tzp); strcpy(starttimestr, ctime(&starttime.tv_sec)); commandline(argc, argv); if (q7) { oldtime = starttime.tv_sec - 604800; /* seconds in a week */ strcpy(oldtimestr, ctime(&oldtime)); oldyear = (oldtimestr[23] - '0') + (oldtimestr[22] - '0') * 10 + (oldtimestr[21] - '0') * 100 + (oldtimestr[20] - '0') * 1000; oldmin = (oldtimestr[15] - '0') + (oldtimestr[14] - '0') * 10; oldhr = (oldtimestr[12] - '0') + (oldtimestr[11] - '0') * 10; olddate = (oldtimestr[9] - '0'); if (oldtimestr[8] != ' ') olddate += (oldtimestr[8] - '0') * 10; oldtimestr[7] = '\0'; strcpy(oldmonth, oldtimestr + 4); oldmonthno = strtomonth(oldmonth); oldmonthcode = 12 * oldyear + oldmonthno - FIRSTMONTH; oldtimecode = oldmonthcode * 44640 + olddate * 1440 + oldhr * 60 + oldmin; } dirsufflength = strlen(DIRSUFFIX); if (rq) { for (i = 0; i < URLHASHSIZE; i++) { urlhead[i] = (struct url *) malloc(sizeof(struct url)); urlhead[i] -> name[0] = '\0'; } } if (iq) { for (i = 0; i < DIRHASHSIZE; i++) { dirhead[i] = (struct dir *) malloc(sizeof(struct dir)); dirhead[i] -> name[0] = '\0'; } } if (sq) { for (i = 0; i < HOSTHASHSIZE; i++) { hosthead[i] = (struct host *) malloc(sizeof(struct host)); hosthead[i] -> name[0] = '\0'; } } if (oq) { for (i = 0; i < DOMHASHSIZE; i++) { domainhead[i] = (struct domain *) malloc(sizeof(struct domain)); domainhead[i] -> name[0] = '\0'; } df = fopen(domainsfile, "r"); /* calculate all domains */ if (df == NULL) { fprintf(stderr, "Warning: Failed to open domains file %s: will not construct domain report\n", domainsfile); oq = OFF; } } /* We put the domains in the following order. aa = 0, ab = 2, ..., ba = 52, ... Domains with more than two letters go in the spaces; co = 134, com = 135, cp = 136. We assume that there are no two long domain names with the same two initial letters. Finally zz = 1350, zzspam = 1351, Unknown = 1352, Numerical = 1353. Each domain contains a 'nexti' element to show which is the next domain that occurs in the domains file. */ if (oq) { domp = domainhead[DOMHASHSIZE - 2]; strcpy(domp -> id, "*UNK"); strcpy(domp -> name, "unknown"); domp -> reqs = 0; domp -> bytes = 0; domp -> next = (struct domain *) malloc(sizeof(struct domain)); domp -> next -> name[0] = '\0'; domp = domainhead[DOMHASHSIZE - 1]; strcpy(domp -> id, "*NUM"); strcpy(domp -> name, "numerical hosts"); domp -> reqs = 0; domp -> bytes = 0; domp -> next = (struct domain *) malloc(sizeof(struct domain)); domp -> next -> name[0] = '\0'; domp2 = domp; domp = domp -> next; while ((rc = fscanf(df, "%255s %255[^\n]", tempstr, tempstr2)) != EOF) { if (rc == 2) { if ((!isdigit(tempstr[0])) && strchr(tempstr, '.') == NULL) { /* new domain */ magicnumber = (tempstr[0] - 'a') * 52 + (tempstr[1] - 'a') * 2 + (tempstr[2] != '\0'); if (magicnumber < 0 || magicnumber > DOMHASHSIZE - 3) fprintf(stderr,"Warning: Ignoring corrupt line in domains file\n"); domp = domainhead[magicnumber]; strcpy(domp -> id, tempstr); strcpy(domp -> name, tempstr2); domp -> reqs = 0; domp -> bytes = 0; domp -> next = (struct domain *) malloc(sizeof(struct domain)); domp -> next -> name[0] = '\0'; domp2 -> nexti = magicnumber; /* domp2 is the last domain we looked at */ domp2 = domp; domp = domp -> next; } else { /* subdomain of last domain */ strcpy(domp -> id, tempstr); strcpy(domp -> name, tempstr2); domp -> reqs = 0; domp -> bytes = 0; domp -> next = (struct domain *) malloc(sizeof(struct domain)); domp -> next -> name[0] = '\0'; domp = domp -> next; } } else { /* rc != 2 */ fprintf(stderr,"Warning: Ignoring corrupt line in domains file\n"); } } domp2 -> nexti = -1; /* marker; last domain has no subsequent one */ fclose(df); } /* end if (oq) */ if (strcmp(logfile, "stdin") == 0) lf = stdin; else { lf = fopen(logfile, "r"); if (lf == NULL) { fprintf(stderr, "Error: Failed to open log file %s: exiting\n", logfile); exit(ERR); } } #ifdef PROF gettimeofday(&thistime, &tzp); timein += thistime.tv_usec - starttime.tv_usec + (thistime.tv_sec - starttime.tv_sec) * 1000000; lasttime = thistime; #endif /* Now start scanning */ while(fgets(inputline, MAXLINELENGTH, lf) != NULL) { #ifdef PROF gettimeofday(&thistime, &tzp); timegets += thistime.tv_usec - lasttime.tv_usec + (thistime.tv_sec - lasttime.tv_sec) * 1000000; lasttime = thistime; #endif linetype = CORRUPT; /* paranoia :) */ if (sscanf_common(inputline, hostn, &date, month, &year, &hr, &min, filename, &code, bytestr) == 9) { linetype = COMMON; #ifdef PROF gettimeofday(&thistime, &tzp); timescan1 += thistime.tv_usec - lasttime.tv_usec + (thistime.tv_sec - lasttime.tv_sec) * 1000000; lasttime = thistime; #endif } else if (sscanf_ncsaold(inputline, hostn, month, &date, &hr, &min, &year, filename) == 7) { linetype = NCSAOLD; if (bq) { bq = OFF; if ((domsortby == BYBYTES && oq) || (reqsortby == BYBYTES && rq) || (dirsortby == BYBYTES && iq)) { fprintf(stderr, "Warning: Logfile contains old-style lines with no bytes data:\n"); fprintf(stderr, " Sorting will be by requests, not bytes\n"); if (domsortby == BYBYTES && oq) { domsortby = BYREQUESTS; domfloor = MIN_DOM_REQS; } if (reqsortby == BYBYTES && rq) { reqsortby = BYREQUESTS; reqfloor = MIN_URL_REQS; } if (dirsortby == BYBYTES && iq) { dirsortby = BYREQUESTS; dirfloor = MIN_DIR_REQS; } } } } #ifdef PROF gettimeofday(&thistime, &tzp); timescan += thistime.tv_usec - lasttime.tv_usec + (thistime.tv_sec - lasttime.tv_sec) * 1000000; lasttime = thistime; #endif if (linetype != CORRUPT) { monthno = strtomonth(month); monthcode = 12 * year + monthno - FIRSTMONTH; timecode = monthcode * 44640 + date * 1440 + hr * 60 + min; firsttimecode = MIN(firsttimecode, timecode); lasttimecode = MAX(lasttimecode, timecode); /* Are we in the last 7 days? Check this every time in case */ /* logfile is not in chronological order */ if (q7) { last7q = FALSE; if (timecode > oldtimecode) last7q = TRUE; } bytes = atof(bytestr); total_bytes += bytes; if (last7q) total_bytes7 += bytes; if (code <= 299 || code == 304) { /* successes */ day = dayofdate(date, monthno , year); ++total_succ_reqs; if (last7q) ++total_succ_reqs7; /* date cataloguing */ if (monthcode >= 0 && monthcode < MAXMONTHS) ++monthlyreq[monthcode]; /* ignore months earlier than first month we saw */ ++dailyreq[day]; /* these are so little work, it's easier just to */ ++hourlyreq[hr]; /* do them rather than to check mq, dq and hq */ #ifdef PROF gettimeofday(&thistime, &tzp); timeother += thistime.tv_usec - lasttime.tv_usec + (thistime.tv_sec - lasttime.tv_sec) * 1000000; lasttime = thistime; #endif /* Now for the request report */ if (rq || iq) { if (strcmp(filename + MAX(strlen(filename) - dirsufflength, 0), DIRSUFFIX) == 0) /* if it ends with the DIRSUFFIX (i.e. index.html), strip it */ filename[strlen(filename) - dirsufflength] = '\0'; if (filename[1] == '%' && filename[2] == '7' && (filename[3] == 'E' || filename[3] == 'e')) { filename[1] = '~'; /* change %7E and %7e to ~ */ strcpy(filename + 2, filename + 4); } } if (rq) { /* First calculate filename's "magic number" */ magicnumber = 0; for (i = 0; filename[i] != '\0'; i++) { magicnumber += magicnumber + filename[i]; while (magicnumber >= URLHASHSIZE) magicnumber -= URLHASHSIZE; } /* now look through the magicnumber'th list for that URL */ tempflag = TRUE; /* tempflag means 'still need to look' */ urlp = (urlhead[magicnumber]); while (urlp -> name[0] != '\0' && tempflag) { if (strcmp(urlp -> name, filename) == 0) { /* then done */ (urlp -> reqs)++; urlp -> bytes += bytes; if (last7q && !(urlp -> last7)) { no_urls7++; urlp -> last7 = TRUE; } tempflag = FALSE; } else { /* look at the next one */ urlp = urlp -> next; } } if (tempflag) { /* reached the end of the list without success; new URL */ no_urls++; strcpy(urlp -> name, filename); urlp -> reqs = 1; urlp -> bytes = bytes; if (last7q) { no_urls7++; urlp -> last7 = TRUE; } else urlp -> last7 = FALSE; urlp -> next = (struct url *) malloc(sizeof(struct url)); urlp -> next -> name[0] = '\0'; } } /* end if (rq) */ #ifdef PROF gettimeofday(&thistime, &tzp); timereq += thistime.tv_usec - lasttime.tv_usec + (thistime.tv_sec - lasttime.tv_sec) * 1000000; lasttime = thistime; #endif /* Now for the directory report. This is just the same as the request report, but with the filename truncated. */ if (iq) { i = 0; for (j = 0; j < dirlevel; j++) { if (filename[i] == '/') i++; for ( ; filename[i] != '/' && filename[i] != '\0'; i++) ; /* run through to level'th slash, if any */ } if (filename[i] == '\0') /* not j levels; run back */ for ( i-- ; filename[i] != '/'; i--) ; /* Now filename[i] == '/' */ filename[i + 1] = '\0'; /* Terminate it there */ if (strcmp(filename, "/") == 0) strcpy(filename, "root directory"); /* Now look through the directories so far, as above. */ magicnumber = 0; for (i = 0; filename[i] != '\0'; i++) { magicnumber += magicnumber + filename[i]; while (magicnumber >= DIRHASHSIZE) magicnumber -= DIRHASHSIZE; } tempflag = TRUE; dirp = (dirhead[magicnumber]); while (dirp -> name[0] != '\0' && tempflag) { if (strcmp(dirp -> name, filename) == 0) { (dirp -> reqs)++; dirp -> bytes += bytes; tempflag = FALSE; } else { dirp = dirp -> next; } } if (tempflag) { no_dirs++; strcpy(dirp -> name, filename); dirp -> reqs = 1; dirp -> bytes = bytes; dirp -> next = (struct dir *) malloc(sizeof(struct dir)); dirp -> next -> name[0] = '\0'; } } /* end if (iq) */ #ifdef PROF gettimeofday(&thistime, &tzp); timedir += thistime.tv_usec - lasttime.tv_usec + (thistime.tv_sec - lasttime.tv_sec) * 1000000; lasttime = thistime; #endif /* Now for the hostname count. Again, just the same as above, but we don't measure bytes and requests for each host. */ for (i = strlen(hostn) - 1; i >= 0; i--) hostn[i] = tolower(hostn[i]); if (sq) { magicnumber = 0; for (i = 0; hostn[i] != '\0'; i++) { magicnumber += magicnumber + hostn[i]; while (magicnumber >= HOSTHASHSIZE) magicnumber -= HOSTHASHSIZE; } tempflag = TRUE; hostp = (hosthead[magicnumber]); while (hostp -> name[0] != '\0' && tempflag) { if (strcmp(hostp -> name, hostn) == 0) { if (last7q && !(hostp -> last7)) { no_hosts7++; hostp -> last7 = TRUE; } tempflag = FALSE; } else { hostp = hostp -> next; } } if (tempflag) { no_hosts++; strcpy(hostp -> name, hostn); if (last7q) { no_hosts7++; no_new_hosts7++; hostp -> last7 = TRUE; } else hostp -> last7 = FALSE; hostp -> next = (struct host *) malloc(sizeof(struct host)); hostp -> next -> name[0] = '\0'; } } /* end if (sq) */ #ifdef PROF gettimeofday(&thistime, &tzp); timehost += thistime.tv_usec - lasttime.tv_usec + (thistime.tv_sec - lasttime.tv_sec) * 1000000; lasttime = thistime; #endif /* Now the domain report. This is different because we already know */ /* all domains, so there need be no clashes in the hash table. */ if (oq) { /* first change hostn into a domain */ for (i = strlen(hostn) - 1; hostn[i] != '.' && i > 0; i--) ; /* run back to final .; or initial char if hostn is corrupt */ if (i == 0) { magicnumber = DOMHASHSIZE - 2; /* representing unknown hosts */ } else if (hostn[i + 1] <= '9' && hostn[i + 1] >= '0') { magicnumber = DOMHASHSIZE - 1; /* representing numerical domains */ } else { strcpy(domainname, hostn + i + 1); magicnumber = (domainname[0] - 'a') * 52 + (domainname[1] - 'a') * 2 + (domainname[2] != '\0'); if (magicnumber < 0 || magicnumber > DOMHASHSIZE - 3) magicnumber = DOMHASHSIZE - 2; else if (strcmp(domainhead[magicnumber] -> id, domainname) != 0) magicnumber = DOMHASHSIZE - 2; } (domainhead[magicnumber] -> reqs)++; domainhead[magicnumber] -> bytes += bytes; /* now run through the rest of the list corresponding to that domain, and check each one against the hostn */ domp = domainhead[magicnumber] -> next; while (domp -> name[0] != '\0') { if (magicnumber != DOMHASHSIZE - 1) { strcpy(tempstr, "."); strcat(tempstr, domp -> id); } else if (magicnumber == DOMHASHSIZE - 1) { strcpy(tempstr, domp -> id); strcat(tempstr, "."); } if ((strcmp(domp -> id, hostn) == 0) || (magicnumber != DOMHASHSIZE - 1 && strcmp(tempstr, hostn + MAX(strlen(hostn) - strlen(tempstr), 0)) == 0) || (magicnumber == DOMHASHSIZE - 1 && strncmp(tempstr, hostn, strlen(tempstr)) == 0)) { (domp -> reqs)++; domp -> bytes += bytes; } domp = domp -> next; } } /* end if (oq) */ #ifdef PROF gettimeofday(&thistime, &tzp); timedom += thistime.tv_usec - lasttime.tv_usec + (thistime.tv_sec - lasttime.tv_sec) * 1000000; lasttime = thistime; #endif } /* end if code <= 299 || code == 304 */ else if (code >= 400) { ++total_fail_reqs; if (last7q) ++total_fail_reqs7; #ifdef PROF gettimeofday(&thistime, &tzp); timeother += thistime.tv_usec - lasttime.tv_usec + (thistime.tv_sec - lasttime.tv_sec) * 1000000; lasttime = thistime; #endif } else { /* code 300's (not 304): redirects */ ++total_other_reqs; if (last7q) ++total_other_reqs7; #ifdef PROF gettimeofday(&thistime, &tzp); timeother += thistime.tv_usec - lasttime.tv_usec + (thistime.tv_sec - lasttime.tv_sec) * 1000000; lasttime = thistime; #endif } } /* end if linetype != CORRUPT */ else { /* line is corrupt */ ++corrupt_lines; if (strchr(inputline, '\n') == NULL) /* line corrupt by being too long; */ fscanf(lf, "%*[^\n]"); /* read to end of line */ #ifdef PROF gettimeofday(&thistime, &tzp); timeother += thistime.tv_usec - lasttime.tv_usec + (thistime.tv_sec - lasttime.tv_sec) * 1000000; lasttime = thistime; #endif } } fclose(lf); /* OUTPUT */ printf("\nWeb Server Statistics for %s\n", hostname); if (hosturl[0] == '-') printf("\n

Web Server Statistics for %s

\n\n", hostname); else { printf("
\n

Web Server Statistics for %s

\n\n", hosturl, hostname); } printf("
\nProgram started at %c%c%c-%c%c-%c%c%c-%c%c%c%c %c%c:%c%c (all times local).\n", starttimestr[0], starttimestr[1], starttimestr[2], starttimestr[8], starttimestr[9], starttimestr[4], starttimestr[5], starttimestr[6], starttimestr[20], starttimestr[21], starttimestr[22], starttimestr[23], starttimestr[11], starttimestr[12], starttimestr[14], starttimestr[15]); if (firsttimecode > oldtimecode) q7 = OFF; if (total_succ_reqs > 0) { firstmonthcode = firsttimecode / 44640; firsttimecode -= firstmonthcode * 44640; firstmonthcode += FIRSTMONTH; firstyear = firstmonthcode / 12; firstmonthno = firstmonthcode % 12; firstdate = firsttimecode / 1440; firsttimecode -= firstdate * 1440; firsthr = firsttimecode / 60; firstmin = firsttimecode % 60; lastmonthcode = lasttimecode / 44640; lasttimecode -= lastmonthcode * 44640; lastmonthcode += FIRSTMONTH; lastyear = lastmonthcode / 12; lastmonthno = lastmonthcode % 12; lastdate = lasttimecode / 1440; lasttimecode -= lastdate * 1440; lasthr = lasttimecode / 60; lastmin = lasttimecode % 60; totalmins = minsbetween(firstdate, firstmonthno, firstyear, firsthr, firstmin, lastdate, lastmonthno, lastyear, lasthr, lastmin) + 1; printf("
Analysed requests from %s-%d-%s-%d %02d:%02d to %s-%d-%s-%d %02d:%02d (%.1f days).\n\n", dayname[dayofdate(firstdate, firstmonthno, firstyear)], firstdate, monthname[firstmonthno], firstyear, firsthr, firstmin, dayname[dayofdate(lastdate, lastmonthno, lastyear)], lastdate, monthname[lastmonthno], lastyear, lasthr, lastmin, (double)totalmins / 1440.0); } if (total_succ_reqs7 + total_fail_reqs7 + total_other_reqs7 == 0) q7 = OFF; /* just total_bytes no good in case (!bq) */ printf("

Total completed requests: "); int3printf(total_succ_reqs); if (q7) { printf(" ("); int3printf(total_succ_reqs7); printf(")"); } printf("\n
Total failed requests: "); int3printf(total_fail_reqs); if (q7) { printf(" ("); int3printf(total_fail_reqs7); printf(")"); } printf("\n
Total redirected requests: "); int3printf(total_other_reqs); if (q7) { printf(" ("); int3printf(total_other_reqs7); printf(")"); } if (totalmins > 30) { printf("\n
Average requests per day: "); double3printf(((double)(total_succ_reqs + total_fail_reqs + total_other_reqs - 1)) * 1440.0 / (totalmins + 0.0)); if (q7) { printf(" ("); int3printf((total_succ_reqs7 + total_fail_reqs7 + total_other_reqs7) / 7); printf(")"); } } if (rq) { /* These data are not collected o/wise */ printf("\n
Number of distinct files requested: "); int3printf(no_urls); if (q7) { printf(" ("); int3printf(no_urls7); printf(")"); } } if (sq) { printf("\n
Number of distinct hosts served: "); int3printf(no_hosts); if (q7) { printf(" ("); int3printf(no_hosts7); printf(")"); printf("\n
Number of new hosts served in last 7 days: "); int3printf(no_new_hosts7); } } printf("\n
Corrupt logfile entries: "); int3printf(corrupt_lines); if (bq) { printf("\n
Total bytes transferred: "); double3printf(total_bytes); if (q7) { printf(" ("); double3printf(total_bytes7); printf(")"); } if (totalmins > 30) { printf("\n
Average bytes transferred per day: "); double3printf((total_bytes * 1440) / (totalmins + 0.0)); if (q7) { printf(" ("); double3printf(total_bytes7 / 7.0); printf(")"); } } } if (q7) printf("\n
(Figures in parentheses refer to the last 7 days)."); /* We need to check here which reports are going to appear (empty reports do not appear even if we requested them). We do this while sorting them. */ if (total_succ_reqs == 0) { mq = OFF; dq = OFF; hq = OFF; oq = OFF; iq = OFF; rq = OFF; } else { if (rq) { rq = OFF; /* turn it off unless we find a big enough one */ urlsorthead = (struct url *) malloc(sizeof(struct url)); /* build up the sort in this list */ urlsorthead -> name[0] = '\0'; /* as marker */ onlist = 0; /* the list we are on */ urlp = urlhead[0]; /* starting at list 0 */ for (i = 0; i < no_urls; i++) { /* run through all the URLs */ if (urlp -> name[0] == '\0') { /* then this member isn't a URL */ urlnextp = urlhead[++onlist]; /* so look at the next list instead */ i--; /* and don't count this one */ } else if ((reqsortby == BYBYTES && (urlp -> bytes / (total_bytes / 10000)) < reqfloor) || (reqsortby != BYBYTES && urlp -> reqs < reqfloor)) { /* we don't want it */ urlnextp = urlp -> next; } else { rq = ON; url_max_reqs = MAX(urlp -> reqs, url_max_reqs); if ((urlp -> bytes > urlsorthead -> bytes && reqsortby == BYBYTES) || (urlp -> reqs > urlsorthead -> reqs && reqsortby == BYREQUESTS) || (strcmp(urlp -> name, urlsorthead -> name) < 0 && reqsortby == ALPHABETICAL) || (urlsorthead -> name[0] == '\0')) { /* if it's before the first item currently on the list, slot it in */ urlnextp = urlp -> next; /* the next one we're going to look at */ urlp -> next = urlsorthead; urlsorthead = urlp; } else { /* otherwise compare with the ones so far */ tempflag = ON; urllastp = urlsorthead; for (urlp2 = urlsorthead -> next; urlp2 -> name[0] != '\0' && tempflag; urlp2 = urlp2 -> next) { if ((urlp -> bytes > urlp2 -> bytes && reqsortby == BYBYTES) || (urlp -> reqs > urlp2 -> reqs && reqsortby == BYREQUESTS) || (strcmp(urlp -> name, urlp2 -> name) < 0 && reqsortby == ALPHABETICAL)) { /* if urlp comes before urlp2 in the chosen ordering, slot it in */ urlnextp = urlp -> next; urlp -> next = urlp2; urllastp -> next = urlp; tempflag = OFF; } urllastp = urlp2; } if (tempflag) { /* we've reached the end of the list; slot it in at */ /* the end */ urlnextp = urlp -> next; urlp -> next = urlp2; urllastp -> next = urlp; } } } urlp = urlnextp; /* so, on to the next one */ } /* end for i */ } /* end if (rq) */ if (iq) { iq = OFF; dirsorthead = (struct dir *) malloc(sizeof(struct dir)); /* build up the sort in this list */ dirsorthead -> name[0] = '\0'; /* as marker */ onlist = 0; /* the list we are on */ dirp = dirhead[0]; /* starting at list 0 */ for (i = 0; i < no_dirs; i++) { /* run through all the DIRs */ if (dirp -> name[0] == '\0') { /* then this member isn't a DIR */ dirnextp = dirhead[++onlist]; /* so look at the next list instead */ i--; /* and don't count this one */ } else if ((dirsortby == BYBYTES && (dirp -> bytes / (total_bytes / 10000)) < dirfloor) || (dirsortby != BYBYTES && dirp -> reqs < dirfloor)) { /* we don't want it */ dirnextp = dirp -> next; } else { iq = ON; dir_max_reqs = MAX(dirp -> reqs, dir_max_reqs); if ((dirp -> bytes > dirsorthead -> bytes && dirsortby == BYBYTES) || (dirp -> reqs > dirsorthead -> reqs && dirsortby == BYREQUESTS) || (strcmp(dirp -> name, dirsorthead -> name) < 0 && dirsortby == ALPHABETICAL) || (dirsorthead -> name[0] == '\0')) { /* if it's before the first item currently on the list, slot it in */ dirnextp = dirp -> next; /* the next one we're going to look at */ dirp -> next = dirsorthead; dirsorthead = dirp; } else { /* otherwise compare with the ones so far */ tempflag = ON; dirlastp = dirsorthead; for (dirp2 = dirsorthead -> next; dirp2 -> name[0] != '\0' && tempflag; dirp2 = dirp2 -> next) { if ((dirp -> bytes > dirp2 -> bytes && dirsortby == BYBYTES) || (dirp -> reqs > dirp2 -> reqs && dirsortby == BYREQUESTS) || (strcmp(dirp -> name, dirp2 -> name) < 0 && dirsortby == ALPHABETICAL)) { /* if dirp comes before dirp2 in the chosen ordering, slot it in */ dirnextp = dirp -> next; dirp -> next = dirp2; dirlastp -> next = dirp; tempflag = OFF; } dirlastp = dirp2; } if (tempflag) { /* we've reached the end of the list; slot it in at */ /* the end */ dirnextp = dirp -> next; dirp -> next = dirp2; dirlastp -> next = dirp; } } } dirp = dirnextp; /* so, on to the next one */ } /* end for i */ } /* end if (iq) */ /* domain check */ if (oq) { oq = OFF; firstdom = DOMHASHSIZE - 2; /* start with unknown domains at front of list */ domainhead[firstdom] -> nexti = -1; j = DOMHASHSIZE - 1; /* the domain we are on; start with numerical domains */ while (j >= 0) { /* run through all the domains */ domp = domainhead[j]; domnextj = domp -> nexti; /* the one we're going to look at after this one */ if (!((domsortby == BYBYTES && domp -> reqs == 0) || (domsortby == BYBYTES && (domp -> bytes / (total_bytes / 10000)) < domfloor) || (domsortby != BYBYTES && domp -> reqs < domfloor))) { /* else we don't want it */ oq = ON; dom_max_reqs = MAX(domp -> reqs, dom_max_reqs); if ((domp -> bytes > domainhead[firstdom] -> bytes && domsortby == BYBYTES) || (domp -> reqs > domainhead[firstdom] -> reqs && domsortby == BYREQUESTS) || (strcmp(domp -> name, domainhead[firstdom] -> name) < 0 && domsortby == ALPHABETICAL)) { /* if it's before the first item currently on the list, slot it in */ domp -> nexti = firstdom; firstdom = j; } else { /* otherwise compare with the ones so far */ tempflag = ON; domlastp = domainhead[firstdom]; for (i = domainhead[firstdom] -> nexti; i >= 0 && tempflag; i = domainhead[i] -> nexti) { if ((domp -> bytes > domainhead[i] -> bytes && domsortby == BYBYTES) || (domp -> reqs > domainhead[i] -> reqs && domsortby == BYREQUESTS) || (strcmp(domp -> name, domainhead[i] -> name) < 0 && domsortby == ALPHABETICAL)) { /* if domp comes before domp2 in the chosen ordering, slot it in */ domp -> nexti = i; domlastp -> nexti = j; tempflag = OFF; } domlastp = domainhead[i]; } if (tempflag) { /* we've reached the end of the list; slot it in at */ /* the end */ domp -> nexti = -1; /* meaning, last item on the list */ domlastp -> nexti = j; } } } j = domnextj; /* so, on to the next one */ } /* end while j >= 0 */ } /* end if (oq) */ } #ifdef PROF gettimeofday(&thistime, &tzp); timesort += thistime.tv_usec - lasttime.tv_usec + (thistime.tv_sec - lasttime.tv_sec) * 1000000; lasttime = thistime; #endif if (mq || dq || hq || oq || iq || rq) printf("\n\n

Go To"); if (mq) printf(": Monthly report"); if (dq) printf(": Daily summary"); if (hq) printf(": Hourly summary"); if (oq) printf(": Domain report"); if (iq) printf(": Directory report"); if (rq) printf(": Request report"); printf("\n"); /* Monthly requests */ if (mq) { printf("


\n

Monthly Report

\n"); printf("\n\n

(Go To: Top"); if (dq) printf(": Daily summary"); if (hq) printf(": Hourly summary"); if (oq) printf(": Domain report"); if (iq) printf(": Directory report"); if (rq) printf(": Request report"); printf(")\n"); lastmonthcode -= FIRSTMONTH; firstmonthcode -= FIRSTMONTH; for (i = firstmonthcode; i <= lastmonthcode; i++) if(monthlyreq[i] > maxmonthlyreq) maxmonthlyreq = monthlyreq[i]; tempint = 10000; for (fieldwidth = 5; maxmonthlyreq / tempint >= 10; fieldwidth++) tempint *= 10; /* so f.w. is log_10(maxmonthlyreq), but at least 5 */ if (monthlyunit == 0) { /* (o/wise just use the given amount) */ monthlyunit = maxmonthlyreq * 3 / (2 * (pagewidth - fieldwidth - 12)); /* except we want a 'nice' amount, so ... */ /* (Nice amount is 1, 1.5, 2, 2.5, 3, 4, 5, 6, 8 * 10^n */ j = 0; while (monthlyunit > 30) { monthlyunit /= 10; j++; } if (monthlyunit == 0) /* if maxmonthlyreq < 40 */ monthlyunit = 1; else if (monthlyunit == 7) monthlyunit = 6; else if (monthlyunit == 9) monthlyunit = 8; else if (monthlyunit > 25 && monthlyunit < 30) monthlyunit = 25; else if (monthlyunit > 20 && monthlyunit < 25) monthlyunit = 20; else if (monthlyunit > 15 && monthlyunit < 20) monthlyunit = 15; else if (monthlyunit > 10 && monthlyunit < 15) monthlyunit = 10; for (i = 0; i < j; i++) { monthlyunit *= 10; } } printf("\n

Each %c represents %d request%s.\n\n

\n", markchar,
	   monthlyunit, (monthlyunit == 1)?"":"s, or part thereof", pagewidth);

    printf("   month: ");
    for (i = 5; i < fieldwidth; i++)
      printf(" ");
    printf("#reqs\n");
    printf("--------  ");
    for (i = 1; i <= fieldwidth; i++)
      printf("-");
    printf("\n");

    for(i = firstmonthcode; i <= lastmonthcode; i++) {
      monthno = i + FIRSTMONTH;
      year = monthno / 12;
      monthno = monthno % 12;
      if (monthno == 0) {
	monthno = 12;
	year--;
      }
      printf("%s %d: %*d: ", monthname[monthno], year, fieldwidth, monthlyreq[i]);
      while (monthlyreq[i] > 0) {
	printf("%c", markchar);
	monthlyreq[i] -= monthlyunit;
      }
      printf("\n");
    }

  }

  /* Daily requests */

  if (dq) {

    printf("
\n\n
\n

Daily Summary

\n"); printf("\n\n

(Go To: Top"); if (mq) printf(": Monthly report"); if (hq) printf(": Hourly summary"); if (oq) printf(": Domain report"); if (iq) printf(": Directory report"); if (rq) printf(": Request report"); printf(")\n"); for (i = 0; i <= 6; i++) if(dailyreq[i] > maxdailyreq) maxdailyreq = dailyreq[i]; tempint = 10000; for (fieldwidth = 5; maxdailyreq / tempint >= 10; fieldwidth++) tempint *= 10; /* so f.w. is log_10(maxmonthlyreq), but at least 5 */ if (dailyunit == 0) { dailyunit = maxdailyreq * 3 / (2 * (pagewidth - 7 - fieldwidth)); j = 0; while (dailyunit > 20) { dailyunit /= 10; j++; } if (dailyunit == 0) dailyunit = 1; else if (dailyunit == 7) dailyunit = 6; else if (dailyunit == 9) dailyunit = 8; else if (dailyunit > 25 && dailyunit < 30) dailyunit = 25; else if (dailyunit > 20 && dailyunit < 25) dailyunit = 20; else if (dailyunit > 15 && dailyunit < 20) dailyunit = 15; else if (dailyunit > 10 && dailyunit < 15) dailyunit = 10; for (i = 0; i < j; i++) { dailyunit *= 10; } } printf("\n

Each %c represents %d request%s.\n\n

\n", markchar,
	   dailyunit, (dailyunit == 1)?"":"s, or part thereof", pagewidth);
    printf("day: ");
    for (i = 5; i < fieldwidth; i++)
      printf(" ");
    printf("#reqs\n");
    printf("---  ");
    for(i = 1; i <= fieldwidth; i++)
      printf("-");
    printf("\n");
    
    for(i = 0; i <= 6; i++) {
      printf("%s: %*d: ", dayname[i], fieldwidth, dailyreq[i]);
      while (dailyreq[i] > 0) {
	printf("%c", markchar);
	dailyreq[i] -= dailyunit;
      }
      printf("\n");
    }

  }

  /* Hourly requests */

  if (hq) {
    
    printf("
\n\n
\n

Hourly Summary

\n"); printf("\n\n

(Go To: Top"); if (mq) printf(": Monthly report"); if (dq) printf(": Daily summary"); if (oq) printf(": Domain report"); if (iq) printf(": Directory report"); if (rq) printf(": Request report"); printf(")\n"); for (i = 0; i <= 23; i++) if(hourlyreq[i] > maxhourlyreq) maxhourlyreq = hourlyreq[i]; tempint = 10000; for (fieldwidth = 5; maxhourlyreq / tempint >= 10; fieldwidth++) tempint *= 10; /* so f.w. is log_10(maxhourlyreq), but at least 5 */ if (hourlyunit == 0) { hourlyunit = 3 * maxhourlyreq / ( 2 * (pagewidth - 6 - fieldwidth)); /* except we want a 'nice' amount, so ... */ j = 0; while (hourlyunit > 10) { hourlyunit /= 10; j++; } if (hourlyunit == 0) hourlyunit = 1; else if (hourlyunit == 7) hourlyunit = 6; else if (hourlyunit == 9) hourlyunit = 8; else if (hourlyunit > 25 && hourlyunit < 30) hourlyunit = 25; else if (hourlyunit > 20 && hourlyunit < 25) hourlyunit = 20; else if (hourlyunit > 15 && hourlyunit < 20) hourlyunit = 15; else if (hourlyunit > 10 && hourlyunit < 15) hourlyunit = 10; for (i = 0; i < j; i++) { hourlyunit *= 10; } } printf("\n

Each %c represents %d request%s.\n\n

\n", markchar,
	   hourlyunit, (hourlyunit == 1)?"":"s, or part thereof", pagewidth);
    printf("hr: ");
    for(i = 5; i < fieldwidth; i++)
      printf(" ");
    printf("#reqs\n");
    printf("--  ");
    for (i = 1; i <= fieldwidth; i++)
      printf("-");
    printf("\n");
    
    for(i = 0; i <= 23; i++) {
      printf("%2d: %*d: ", i, fieldwidth, hourlyreq[i]);
      while (hourlyreq[i] > 0) {
	printf("%c", markchar);
	hourlyreq[i] -= hourlyunit;
      }
      printf("\n");
    }
    
  }

  /* Domain report */

  if (oq) {

    printf("
\n\n
\n

Domain Report

\n\n"); printf("\n\n

(Go To: Top"); if (mq) printf(": Monthly report"); if (dq) printf(": Daily summary"); if (hq) printf(": Hourly summary"); if (iq) printf(": Directory report"); if (rq) printf(": Request report"); printf(")\n

");
    
#ifdef PROF
  gettimeofday(&thistime, &tzp);
  timeout += thistime.tv_usec - lasttime.tv_usec + (thistime.tv_sec - lasttime.tv_sec) * 1000000;
  lasttime = thistime;
#endif

    tempint = 10000;
    for (fieldwidth = 5; dom_max_reqs / tempint >= 10; fieldwidth++)
      tempint *= 10;
    
    for (i = 5; i < fieldwidth + 1; i++)
      printf(" ");
    printf("#reqs : ");
    if (bq)
      printf(" %%bytes : ");
    printf("domain\n");
    for (i = 1; i <= fieldwidth + 2; i++)
      printf("-");
    if (bq)
      printf("  --------");
    printf("  ------\n");

    for (i = firstdom; i >= 0; i = domainhead[i] -> nexti) {

      if (!(i == DOMHASHSIZE - 2 && domainhead[i] -> reqs == 0)) {

	if (bq) {
	  bytepc = (domainhead[i] -> bytes) / (total_bytes / 10000);   /* this domain's bytes, as %age, *100 */
	  bytepc1 = ((int)(bytepc)) / 100;    /* whole number of %bytes */
	  bytepc2 = ((int)(bytepc)) % 100;    /* remaining 100ths. */
	}

	printf(" %*d : ", fieldwidth, domainhead[i] -> reqs);
	
	if (bq && (bytepc1 > 0 || bytepc2 > 0))
	  printf("%3d.%02d%% : ", bytepc1, bytepc2);
	else if (bq)
	  printf("        : ");

	if (domainhead[i] -> id[0] == '*')     /* flagged domains, not real domain names */
	  printf("[%s]\n", domainhead[i] -> name);
	else if (domainhead[i] -> name[0] == '?')   /* real domain, but don't print name */
	  printf(".%s\n", domainhead[i] -> id);
	else
	  printf(".%s (%s)\n", domainhead[i] -> id, domainhead[i] -> name);

	/* Now print its subdomains too. */
      
	domp = domainhead[i] -> next;
      
	while (domp -> name[0] != '\0') {
	
	  if (bq) {
	    bytepc = (domp -> bytes) / (total_bytes / 10000);   /* this domain's bytes, as %age, *100 */
	    bytepc1 = ((int)(bytepc)) / 100;    /* whole number of %bytes */
	    bytepc2 = ((int)(bytepc)) % 100;    /* remaining 100ths. */
	  }
	  
	  if (!bq)
	    printf("(%*d): ", fieldwidth, domp -> reqs);
	  else if (bytepc1 > 0 || bytepc2 > 0)
	    printf("(%*d):(%3d.%02d%%): ", fieldwidth, domp -> reqs, bytepc1, bytepc2);
	  else
	    printf("(%*d):         : ", fieldwidth, domp -> reqs);
	  
	  tempp = domp -> id;
	  while ((tempp = strchr(tempp, '.')) != NULL) {
	    printf("  ");    /* print two spaces for each dot in name */
	    tempp++;
	  }
	  if (i == DOMHASHSIZE - 1)
	    printf("  ");    /* and two more for numerical domains */
	  printf("%s", domp -> id);
	  
	  if (domp -> name[0] != '?')    /* print name */
	    printf(" (%s)", domp -> name);
	  
	  printf("\n");
	  
	  domp = domp -> next;
	  
	}    /* end while */
	
      }

    }   /* end for (i = running over domains) */
    
  }   /* end if (oq) */

  /* Directory report */
   
  if (iq) {
 
    printf("
\n\n
\n

Directory Report

\n\n"); printf("\n\n

(Go To: Top"); if (mq) printf(": Monthly report"); if (dq) printf(": Daily summary"); if (hq) printf(": Hourly summary"); if (oq) printf(": Domain report"); if (rq) printf(": Request report"); printf(")\n

");
    
    tempint = 10000;
    for (fieldwidth = 5; dir_max_reqs / tempint >= 10; fieldwidth++)
      tempint *= 10;
    
    for (i = 5; i < fieldwidth; i++)
      printf(" ");
    printf("#reqs: ");
    if (bq)
      printf("%%bytes: ");
    printf("directory\n");
    for (i = 1; i <= fieldwidth; i++)
      printf("-");
    if (bq)
      printf("  ------");
    printf("  ---------\n");
    
    
    for(dirp = dirsorthead; dirp -> name[0] != '\0'; dirp = dirp -> next) {
      if (bq) {
	bytepc = dirp -> bytes / (total_bytes / 10000);
	bytepc1 = ((int)(bytepc)) / 100;    /* whole number of %bytes */
	bytepc2 = ((int)(bytepc)) % 100;    /* remaining 100ths. */
      }
      if (!bq)
	printf("%*d: %s\n", fieldwidth, dirp -> reqs, dirp -> name);
      else if (bytepc1 > 0 || bytepc2 > 0)
	printf("%*d:%3d.%02d%%: %s\n", fieldwidth, dirp -> reqs, bytepc1,
	       bytepc2, dirp -> name);
      else
	printf("%*d:       : %s\n", fieldwidth, dirp -> reqs, dirp -> name);
    }
      
  }

  /* Request report */

  if (rq) {

    printf("
\n\n
\n

Request Report

\n\n"); printf("\n\n

(Go To: Top"); if (mq) printf(": Monthly report"); if (dq) printf(": Daily summary"); if (hq) printf(": Hourly summary"); if (oq) printf(": Domain report"); if (iq) printf(": Directory report"); printf(")\n

");
    
    tempint = 10000;
    for (fieldwidth = 5; url_max_reqs / tempint >= 10; fieldwidth++)
      tempint *= 10;
    
    for (i = 5; i < fieldwidth; i++)
      printf(" ");
    printf("#reqs: ");
    if (bq)
      printf("%%bytes: ");
    printf("filename\n");
    for (i = 1; i <= fieldwidth; i++)
      printf("-");
    if (bq)
      printf("  ------");
    printf("  --------\n");

    for(urlp = urlsorthead; urlp -> name[0] != '\0'; urlp = urlp -> next) {
      if (bq) {
	bytepc = urlp -> bytes / (total_bytes / 10000);
	bytepc1 = ((int)(bytepc)) / 100;    /* whole number of %bytes */
	bytepc2 = ((int)(bytepc)) % 100;    /* remaining 100ths. */
      }
      if ((kq == 2) ||     /* if we want to link to everything ... */
	  ((strcmp(urlp -> name + MAX(strlen(urlp -> name) - 5, 0), ".html") == 0
	  || strcmp(urlp -> name + MAX(strlen(urlp -> name) - 4, 0), ".htm") == 0
	  || urlp -> name[strlen(urlp -> name) - 1] == '/')
	  && kq == 1)) {   /* ... or it is a page, and we want to link to pages */
	if (!bq)
	  printf("%*d: %s\n", fieldwidth, urlp -> reqs,
		 urlp -> name, urlp -> name);
	else if (bytepc1 > 0 || bytepc2 > 0)
	  printf("%*d:%3d.%02d%%: %s\n", fieldwidth,
		 urlp -> reqs, bytepc1, bytepc2, urlp -> name, urlp -> name);
	else
	  printf("%*d:       : %s\n", fieldwidth,
		 urlp -> reqs, urlp -> name, urlp -> name);
      }
      else if (!bq)
	printf("%*d: %s\n", fieldwidth, urlp -> reqs, urlp -> name);
      else if (bytepc1 > 0 || bytepc2 > 0)
	printf("%*d:%3d.%02d%%: %s\n", fieldwidth, urlp -> reqs, bytepc1,
	       bytepc2, urlp -> name);
      else printf("%*d:       : %s\n", fieldwidth, urlp -> reqs, urlp -> name);
    }
      
  }

  /* Bit at the bottom of the page */

  printf("
\n\n
\nThis analysis was produced by analog%s.\n", VERSION); gettimeofday(&stoptime, &tzp); #ifdef PROF timeout += stoptime.tv_usec - lasttime.tv_usec + (stoptime.tv_sec - lasttime.tv_sec) * 1000000; #endif stoptime.tv_sec -= starttime.tv_sec; stoptime.tv_usec -= starttime.tv_usec; /* so now measures elapsed time */ if (total_fail_reqs + total_succ_reqs + total_other_reqs == 0) /* i.e. empty or corrupt logfile */ printf("
Running time: %d minute%s, %d second%s.
\n", stoptime.tv_sec / 60, ((stoptime.tv_sec / 60) == 1)?"":"s", stoptime.tv_sec % 60, ((stoptime.tv_sec % 60) == 1)?"":"s"); else printf("
Running time: %d minute%s, %d second%s (%d microseconds per request). \n", stoptime.tv_sec / 60, ((stoptime.tv_sec / 60) == 1)?"":"s", stoptime.tv_sec % 60, ((stoptime.tv_sec % 60) == 1)?"":"s", (stoptime.tv_sec * 1000000 + stoptime.tv_usec) / (total_fail_reqs + total_succ_reqs + total_other_reqs)); printf("\n\n

(Go To: Top"); if (mq) printf(": Monthly report"); if (dq) printf(": Daily summary"); if (hq) printf(": Hourly summary"); if (oq) printf(": Domain report"); if (iq) printf(": Directory report"); if (rq) printf(": Request report"); printf(")\n"); printf("\n\n\n"); #ifdef PROF timetot = timein + timeout + timegets + timescan1 + timescan2 + timescan + timereq + timedom + timedir + timehost + timedomsort + timeout + timeother; timetot /= 100; fprintf(stderr, "Initial time = %10d (%5.2f%%)\n", timein, (double)timein / (double)timetot); fprintf(stderr, "fgets time = %10d (%5.2f%%)\n", timegets, (double)timegets / (double)timetot); fprintf(stderr, "sscanf time 1 = %10d (%5.2f%%)\n", timescan1, (double)timescan1 / (double)timetot); fprintf(stderr, "sscanf time 2 = %10d (%5.2f%%)\n", timescan2, (double)timescan2 / (double)timetot); fprintf(stderr, "sscanf time 3 = %10d (%5.2f%%)\n", timescan, (double)timescan / (double)timetot); fprintf(stderr, "Request time = %10d (%5.2f%%)\n", timereq, (double)timereq / (double)timetot); fprintf(stderr, "Domain time = %10d (%5.2f%%)\n", timedom, (double)timedom / (double)timetot); fprintf(stderr, "Directory time = %10d (%5.2f%%)\n", timedir, (double)timedir / (double)timetot); fprintf(stderr, "Host time = %10d (%5.2f%%)\n", timehost, (double)timehost / (double)timetot); fprintf(stderr, "Sort time = %10d (%5.2f%%)\n", timesort, (double)timesort / (double)timetot); fprintf(stderr, "Dom. sort time = %10d (%5.2f%%)\n", timedomsort, (double)timedomsort / (double)timetot); fprintf(stderr, "Output time = %10d (%5.2f%%)\n", timeout, (double)timeout / (double)timetot); fprintf(stderr, "Other time = %10d (%5.2f%%)\n", timeother, (double)timeother / (double)timetot); #endif return(OK); }