Improved Twitter Stats in Ruby

Posted by Alpha Thu, 03 Jan 2008 10:36:00 GMT

The previous method was a bit messy, so I’ve cleaned it up a bit.

Download twitter_stats.tar.gz

twitter.rb

Now uses an Sqlite3 database to hold the data.

require 'active_record'
require 'hpricot'
require 'open-uri'

ActiveRecord::Base.establish_connection(
  :adapter => 'sqlite3',
  :dbfile => File.join(File.dirname(__FILE__), 'tweets.db')
)

class Tweet < ActiveRecord::Base
  def time
    DateTime.parse(time_before_type_cast)
  end
end

class Twitter
  def initialize(user)
    @user_url = "http://twitter.com/#{user}"

    @doc = Hpricot(open(@user_url))
    @page = 1

    @tweets = [current_tweet]
    @tweets += page_to_tweets
  end

  def current_tweet
    tweet,time = @doc/'div.desc'/'p'
    tweet = tweet.inner_html
    time = DateTime.parse(time.at('abbr')['title'])

    {:tweet => tweet, :time => time}
  end

  def page_to_tweets
    (@doc/'div.tab'/'tr.hentry').map do |tweet|
      tweet,time = tweet/'span'
      tweet = tweet.inner_html.gsub(/^\s*(.*)\s*$/, '\1')
      time = DateTime.parse(time.at('abbr')['title'])

      {:tweet => tweet, :time => time}
    end
  end

  def older?
    (@doc/'div.tab'/'div.pagination'/'a').last.inner_text =~ /Older/
  end

  def succ
    if @tweets.empty?
      return nil unless older?

      @page += 1
      @doc = Hpricot(open("#{@user_url}?page=#{@page}"))
      @tweets = page_to_tweets
    end

    @tweets.shift
  end
end

download_tweets.rb

#!/usr/bin/env ruby

require 'twitter'

last_tweet = Tweet.find(:first, :order => 'time DESC')

tweets = Twitter.new(ARGV[0])

if last_tweet.nil? or tweets.current_tweet[:time] > last_tweet.time
  while tweet = tweets.succ
    break if last_tweet and tweet[:time] <= last_tweet.time

    Tweet.create(tweet)
  end
end

generate_graphs.rb

#!/usr/bin/env ruby

require 'gchart'
require 'twitter'

month_data = Array.new(12, 0)
day_data = Array.new(7, 0)
hour_data = Array.new(24, 0)
reply_data = Hash.new(0)

Tweet.find(:all).select {|t| t.time.year == 2007 }.each do |t|
  month_data[t.time.month-1] += 1
  day_data[t.time.wday] += 1
  hour_data[(t.time.hour-8)%24] += 1
  reply_data[$1] += 1 if t[:tweet] =~ /@<a href="\/([^"]+)">\1<\/a>/
end

def min_max_label(data)
  "|#{data.min}|#{data.max}"
end

puts GChart.line(
  :title => 'Tweets per Hour',
  :data => hour_data,
  :width => 400,
  :height => 300,
  :extras => { 'chxt' => 'x,y', 'chxl' => "0:|#{(0..23).to_a.join('|')}|1:#{min_max_label(hour_data)}" }
).to_url

puts GChart.bar(
  :title => 'Tweets per Day',
  :data => day_data,
  :width => 400,
  :height => 300,
  :extras => { 'chxt' => 'x,y', 'chxl' => "0:|#{Date::ABBR_DAYNAMES.compact.join('|')}|1:#{min_max_label(day_data)}" },
  :orientation => :vertical
).to_url

puts GChart.bar(
  :title => 'Tweets per Month',
  :data => month_data,
  :width => 400,
  :height => 300,
  :extras => { 'chxt' => 'x,y', 'chxl' => "0:|#{Date::ABBR_MONTHNAMES.compact.join('|')}|1:#{min_max_label(month_data)}" },
  :orientation => :vertical
).to_url

reply_data = reply_data.sort_by {|_,v| v }.reverse
reply_data.each {|k,v| puts "#{k}: #{v}" }
reply_labels = reply_data.map {|k,_| k }
reply_data = reply_data.map {|_,v| v }
puts GChart.bar(
  :title => 'Most Replies',
  :data => reply_data,
  :width => 400,
  :height => 300,
  :extras => { 'chxt' => 'x,y', 'chxl' => "0:|#{reply_labels.join('|')}|1:#{min_max_label(reply_data)}" },
  :orientation => :vertical
).to_url

Posted in , ,  | 1 comment | no trackbacks

Comments

  1. Dan Brickley said 2 months later:

    Do you have SQL init scripts somewhere? Or is the rails / active record stuff supposed to handle this automagically?

    I get:

    ruby download_to_db.rb danbri /System/Library/Frameworks/Ruby.framework/Versions/1.8/usr/lib/ruby/gems/1.8/gems/activerecord-1.15.6/lib/active_record/connection_adapters/abstract_adapter.rb:128:in `log’: SQLite3::SQLException: no such table: tweets: SELECT * FROM tweets ORDER BY time DESC LIMIT 1 (ActiveRecord::StatementInvalid)

    actually I cheated by ading ‘require ‘rubygems’’ to download_to_db.rb otherwise it didn’t find active record, but I hope that shouldn’t make a difference…

Trackbacks

Use the following link to trackback from your own site:
http://blog.kejadlen.net/trackbacks?article_id=improved-twitter-stats-in-ruby&day=03&month=01&year=2008

(leave url/email »)

   Comment Markup Help Preview comment