require 'nokogiri' require 'open-uri' #Change this to your username to run this script for you. #Make sure it makes the name in the 'lesswrong.com/user/USERNAME/comments/ url. username = "Vaniver" def parse_page url puts $pages $pages+=1 #We obtain the url for a page somehow, and want to pull out all of the comments from it. page = Nokogiri::HTML.parse(open(url)) temp_kar=[] temp_links=[] page.xpath('//span[@class="votes "]').children.each {|x| #This extracts the karma. We get each one twice, though. temp_kar.push(x.content[/-?\d+/].to_i) } page.xpath('//li[@class="permalink"]').children.each {|x| #This extracts the permalink. temp_links.push(x["href"]) } #Make a karma histogram. temp_links.each_index {|y| $karma[temp_kar[2*y]]+=1; $links.push([temp_kar[2*y],temp_links[y]]); } #Now, find the url of the next page, if it exists, and parse that page. link = page.xpath('//a[text()="Next"]')[0] $last = link["href"] if link parse_page link["href"] if link end $karma = Hash.new(0) $links = [] $pages = 0 $last = "" parse_page("http://lesswrong.com/user/#{username}/comments/") puts $last puts $pages #Now that we have a karma hash, output it. outfile=File.open("karma.tsv",'w') sk=$karma.sort sk.each{|pair| outfile.puts(pair[0].to_s+"\t"+pair[1].to_s)} outfile.close #Now that we have a hash of links, output it. outfile=File.open("links.tsv",'w') sl=$links.sort.reverse sl.each{|pair| outfile.puts(pair[0].to_s+"\t"+pair[1])} outfile.close