soup = BeautifulSoup(urllib2.urlopen(url))
for link in soup.body('a', 'menuItem'):
item_name = link.renderContents()
item_url = link['href']
# do something with those...
require 'rubygems'
require 'scrapi'
item = Scraper.define do
process "a.menuItem", :title => :text, :link => "@href"
result :title, :link
end
rune = Scraper.define do
array :items
process "table.newleft2 a.menuItem", :items => item
result :items
end
url = URI.parse("http://www.runehq.com/databasesearch.php")
items = rune.scrape(url)
items.each do |item|
puts "#{item.title} (#{item.link})"
end
which returns:'perfect' gold bar (/database.php?type=item&id=001822) 'perfect' gold ore (/database.php?type=item&id=001821) 'perfect' necklace (/database.php?type=item&id=001824) 'perfect' ring (/database.php?type=item&id=001823) 'voice of doom' potion (/database.php?type=item&id=003785) 1/2 anchovy pizza (/database.php?type=item&id=000504) 1/2 meat pizza (/database.php?type=item&id=000507) 1/2 p'apple pizza (/database.php?type=item&id=000509) 1/2 plain pizza (/database.php?type=item&id=001162)
You are not logged in, either login or create an account to post comments
curl http://www.runehq.com/databasesearch.php | grep square|cut -d "\"" -f 8|sed 's/\/database/http\:\/\/www.runehq.com\/databasesearch.php\?/g'
posted by Cat Pie Hurts at 5:45 PM on August 25