[Zope] parsing a textfile line by line -- Forgive the long me ssage

Thomas Mühlens tomeins@yahoo.com
Wed, 21 Feb 2001 05:37:05 -0800 (PST)


That's it ... it took me a while to fiddle this code
apart but it worked.  Thanks to all of you for your
help.

--- "Farrell, Troy" <troy.farrell@wilcom.com> wrote:
> I am writing a log file parser to do (buzzword
> alert) "Streaming Media
> Metrics".  I have many logfiles from streaming
> video/audio servers.  My
> Streaming Provider makes the logs available on an
> ftp server.  I import the
> logfiles (by hand for now, soon by Xron), and parse
> them with this python
> script (not external method):
> 
> """
>   This is a set of Python functions that parse
>   and report the information contained in a
>   NetShow Server log files.
> """
> # We begin to sort the lines by spaces.
> # Unlike Real Media Servers, NetShow Server log
> # files are entirely separated by spaces.  This
> # makes the code really easy.
> 
> import string
> 
> # number of bad lines in the log file
> badline=0
> 
> for line in string.split(logfile,"\n"):
>   # process each line of the log
> 
>   e = string.split(line)
>   # e is a list of each element, split by the
> spaces.
>   loe = len(e)
>   # crunch the number of elements in the list
> 
>   if (loe == 44):
> 
>     # see if for some weird reason, the line is a
> comment line:
>     if e[0][0] == '#':
>       pass
>     else:
>       c_ip      = e[0]
>       date      = e[1]
>       time      = e[2]
>       c_dns     = e[3]
>       cs_uri_stem = e[4]
>       c_starttime = e[5]
>       x_duration  = e[6]
>       c_rate      = e[7]
>       c_status    = e[8]
>       c_playerid  = e[9]
>       c_playerversion = e[10]
>       c_playerlanguage = e[11]
>       cs_user_agent = e[12]
>       cs_referer = e[13]
>       c_hostexe   = e[14]
>       c_hostexever = e[15]
>       c_os         = e[16]
>       c_osversion  = e[17]
>       c_cpu        = e[18]
>       filelength = e[19]
>       filesize   = e[20]
>       avgbandwidth = e[21]
>       protocol   = e[22]
>       transport  = e[23]
>       audiocodec = e[24]
>       videocodec = e[25]
>       channel_url = e[26]
>       sc_bytes   = e[27]
>       c_bytes    = e[28]
>       s_pkts_sent = e[29]
>       c_pkts_received = e[30]
>       c_pkts_lost_client = e[31]
>       c_pkts_lost_net = e[32]
>       c_pkts_lost_cont_net = e[33]
>       c_resendreqs = e[34]
>       c_pkts_recovered_ecc = e[35]
>       c_pkts_recovered_resent = e[36]
>       c_buffercount = e[37]
>       c_totalbuffertime = e[38]
>       c_quality = e[39]
>       s_ip = e[40]
>       s_dns = e[41]
>       s_totalclients = e[42]
>       s_cpu_util = e[43]
>       #cs_uri_query = e[44]
> 
>       #insert all that junk into PostgreSQL
> #      context.sqlStreamInsertIntoNetShowRaw(c_ip =
> c_ip, date = date, time
> = time, c_dns = c_dns, cs_uri_stem = cs_uri_stem,
> c_starttime = c_starttime,
> x_duration = x_duration, c_rate = c_rate, c_status =
> c_status, c_playerid =
> c_playerid, c_playerversion = c_playerversion,
> c_playerlanguage =
> c_playerlanguage, cs_user_agent = cs_user_agent,
> cs_referer = cs_referer,
> c_hostexe = c_hostexe, c_hostexever = c_hostexever,
> c_os = c_os, c_osversion
> = c_osversion, c_cpu = c_cpu, filelength =
> filelength, filesize = filesize,
> avgbandwidth = avgbandwidth, protocol = protocol,
> transport = transport,
> audiocodec = audiocodec, videocodec = videocodec,
> channel_url = channel_url,
> sc_bytes = sc_bytes, c_bytes = c_bytes, s_pkts_sent
> = s_pkts_sent,
> c_pkts_received = c_pkts_received,
> c_pkts_lost_client = c_pkts_lost_client,
> c_pkts_lost_net = c_pkts_lost_net,
> c_pkts_lost_cont_net =
> c_pkts_lost_cont_net, c_resendreqs = c_resendreqs,
> c_pkts_recovered_ecc =
> c_pkts_recovered_ecc, c_pkts_recovered_resent =
> c_pkts_recovered_resent,
> c_buffercount = c_buffercount, c_totalbuffertime =
> c_totalbuffertime,
> c_quality = c_quality, s_ip = s_ip, s_dns = s_dns,
> s_totalclients =
> s_totalclients, s_cpu_util = s_cpu_util)
> 
>       #
>       # Debugging print statements.  Ughh.  That is
> a bunch of print
> statements.
>       #
> 
>       #print "c_ip %s\n" % c_ip
>       #print "date %s\n" % date
>       #print "time %s\n" % time
>       #print "c_dns %s\n" % c_dns
>       #print "cs_uri_stem %s\n" % cs_uri_stem
>       #print "c_starttime %s\n" % c_starttime
>       #print "x_duration %s\n" % x_duration
>       #print "c_rate %s\n" % c_rate
>       #print "c_status %s\n" % c_status
>       #print "c_playerid %s\n" % c_playerid
>       #print "c_playerversion %s\n" %
> c_playerversion
>       #print "c_playerlanguage %s\n" %
> c_playerlanguage
>       #print "cs_user_agent %s\n" % cs_user_agent
>       #print "cs_referer %s\n" % cs_referer
>       #print "c_hostexe %s\n" % c_hostexe
>       #print "c_hostexever %s\n" % c_hostexever
>       #print "c_os %s\n" % c_os
>       #print "c_osversion %s\n" % c_osversion
>       #print "c_cpu %s\n" % c_cpu
>       #print "filelength %s\n" % filelength
>       #print "filesize %s\n" % filesize
>       #print "avgbandwidth %s\n" % avgbandwidth
>       #print "protocol %s\n" % protocol
>       #print "transport %s\n" % transport
>       #print "audiocodec %s\n" % audiocodec
>       #print "videocodec %s\n" % videocodec
>       #print "channel_url %s\n" % channel_url
>       #print "sc_bytes %s\n" % sc_bytes
>       #print "c_bytes %s\n" % c_bytes
>       #print "s_pkts_sent %s\n" % s_pkts_sent
>       #print "c_pkts_received %s\n" %
> c_pkts_received
>       #print "c_pkts_lost_client %s\n" %
> c_pkts_lost_client
>       #print "c_pkts_lost_net %s\n" %
> c_pkts_lost_net
>       #print "c_pkts_lost_cont_net %s\n" %
> c_pkts_lost_cont_net
>       #print "c_resendreqs %s\n" % c_resendreqs
>       #print "c_pkts_recovered_ecc %s\n" %
> c_pkts_recovered_ecc
>       #print "c_pkts_recovered_resent %s\n" %
> c_pkts_recovered_resent
>       #print "c_buffercount %s\n" % c_buffercount
>       #print "c_totalbuffertime %s\n" %
> c_totalbuffertime
>       #print "c_quality %s\n" % c_quality
>       #print "s_ip %s\n" % s_ip
>       #print "s_dns %s\n" % s_dns
>       #print "s_totalclients %s\n" % s_totalclients
>       #print "s_cpu_util %s\n" % s_cpu_util
>       ##print "cs_uri_query %s\n" % cs_uri_query
>   else:
>     # loe != 44
>     # we have an error
>     if (loe > 0):
>       # see if for some weird reason, the line is a
> comment line:
>       if e[0][0] == '#':
>         pass
>       else:
>         outline = "###A faulty line of log file: " +
> e[0] + " with %d" %
> (loe) + " units" ###"
>         print outline
>     else:
>       print "***   An empty line in the log file!   
> ***"
>       print "*** Ususally this is the end of the log
> ***"
>       badline = badline + 1
> 
=== message truncated ===


__________________________________________________
Do You Yahoo!?
Yahoo! Auctions - Buy the things you want at great prices! http://auctions.yahoo.com/