<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/css" href="/stylesheets/rss.css"?>
<rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:trackback="http://madskills.com/public/xml/rss/module/trackback/">
  <channel>
    <title>a.muse: Tag soup</title>
    <link>http://www.jessirae.com/blog/articles/tag/soup</link>
    <language>en-us</language>
    <ttl>40</ttl>
    <description></description>
    <item>
      <title>Grab all of the href links from a Page</title>
      <description>&lt;p&gt;Use &lt;a href="http://www.crummy.com/software/RubyfulSoup/"&gt;Rubyful Soup&lt;/a&gt; to get all of the hyperlinks on a page&amp;#8230;&lt;/p&gt;
	&lt;pre&gt;&lt;code&gt;soup = BeautifulSoup.new(page_content)
result = soup.find_all('a')
result.each { |tag| 
  urls[i] = tag['href']
  if urls[i].to_s.slice(0,1) != 'h' then
          #add first part of url to href link if link is internal
          urls[i] = home + urls[i]
  end
  i = i + 1
}&lt;/code&gt;&lt;/pre&gt;
</description>
      <pubDate>Tue, 06 Feb 2007 11:49:00 -0600</pubDate>
      <guid isPermaLink="false">urn:uuid:8a384738-4285-4d5e-aced-960850432165</guid>
      <author>Jessica</author>
      <link>http://www.jessirae.com/blog/articles/2007/02/06/grab-all-of-the-href-links-from-a-page</link>
      <category>ruby &amp; rails</category>
      <category>technology</category>
      <category>HTML</category>
      <category>soup</category>
      <category>rubyful</category>
      <category>ruby</category>
      <category>processing</category>
      <category>hyperlinks</category>
      <trackback:ping>http://www.jessirae.com/blog/articles/trackback/8754</trackback:ping>
    </item>
  </channel>
</rss>
