Monday, February 27, 2006

[tech] Yahoo search term volume reader

Midnight with Python.


#! /usr/bin/env python

import urllib
import re
import fileinput

class YahooReader:

# Set the seed data
LogFileName = 'Yahoo.log'
SearchTermFile = 'SearchTerms.txt'

def Run(self):
for line in fileinput.input(self.SearchTermFile):
line = re.compile('\n').sub('', line).strip()
self.DoSearchTerm (line)

def DoSearchTerm (self, ToDo):

print ToDo

# -- Read the page
t = urllib.urlopen(
'http://inventory.overture.com'
+'/d/searchinventory/suggestion/?term='
+re.compile(' ').sub('+', ToDo).strip()).read()

# -- Look for the exact match, which are highlighted with
# -- a different color and bigger font
p = re.compile (
'<td>'
+'<font face=\"verdana,sans-serif\" size=2 color=E8E8E8>'
+'&nbsp;(?P<SearchVolume>[^<]*)'
+'<\/td>[\r\n\t]*<td>'
+'<font face=\"verdana,sans-serif\" size=2 color=E8E8E8>'
+'&nbsp;(?P<Term>[^<]*)<'
).findall(t)

if not p:
# -- If no exact match, get the first one
p = re.compile (
'<td>'
+'<font face=\"verdana,sans-serif\" size=1>'
+'&nbsp;(?P<SearchVolume>[^<]*)'
+'<\/td>[\r\n\t]*'
+'<td>&nbsp;'
+'<a href=.*>'
+'<font face=\"verdana,sans-serif\" size=1 color=#000000>'
+'(?P<Term>[^<]*)'
).findall(t)

# -- Write out the result
logfile = open(self.LogFileName, 'a')
if p:
logfile.write(ToDo + '|' + p[0][0] + '|' + p[0][1] + '\n')
else:
logfile.write(ToDo + '|-1|<No Result>\n')
logfile.close()

# RUN IT --------------------------------------------
YahooReader().Run();