require 'rexml/document' require 'rubygems' # pretty print require 'pp' # for getting the pages require 'nokogiri' require 'open-uri' require 'uri' # for the ical stuff require 'ri_cal' require 'tzinfo' =begin TODO: - put titles into ical feed - check how duration would be handled when more than 1 hour - put url into the description - setup cron =end class Movie attr_accessor :titleE, :titleF, :descE, :descF, :urlE, :urlF, :timesE, :timesF, :duration # create a movie, setting the english title def initialize(titleE) @titleE = titleE @timesE = Array.new @timesF = Array.new @duration = 45 end end # there is probably a nicer way to do this def get_title(lines) title = "" # get first 2 lines and remove any newlines lines.each do |titleLine| title << " " << titleLine.squeeze(" ").strip end #puts 'about to return title: \'' << title.strip << '\'' return title.strip end # if the any times exist on the lineNbr, it parses them out and returns def get_times(date, content, lineNbr) times = Array.new if (content[lineNbr]) content[lineNbr].split(",").each do |timeStr| #   markup char is translated to "\302\240" so we want to remove that timeStr.sub!(/\302\240/, '') timeStr.strip! next if timeStr.empty? time = Time.parse(timeStr) time = Time.mktime(date.year, date.month, date.day, time.hour, time.min) times << time.to_s end end return times end def parse_movie_times(doc, movies) doc.search('//table[@class="iconfin"]').each do |movieChunk| # get the movie title based on the first 2 lines title = get_title(movieChunk.content.first(2)) movie = movies[title] movieChunk.content.each_line.with_index do |line, idx| begin #puts "this is my line: " + line date = Date.parse(line) # get the times it runs in english movie.timesE += get_times(date, movieChunk.content.lines.to_a, idx+2) # and now in french movie.timesF += get_times(date, movieChunk.content.lines.to_a, idx+4) rescue ArgumentError #puts "couldn't parse line: " + line end end end end def make_absolute( href, root ) uri = URI.parse(href) uri = URI.parse(root).merge(href) if uri.relative? uri.to_s end def parse_movie_urls(doc, movies, root) doc.css('table.text_menu_gauche a').each do |movieChunk| title = movieChunk.content.strip movie = movies[title] abs_url = make_absolute(movieChunk['href'], root) movie.urlE = abs_url #puts "movie title: \'" << movie.titleE << "\' using title \'" << title << "\' and url \'" << abs_url << "\'" end end # returns a hash of the page def parse_page(url) #puts 'url to parse is: ' << url values = Hash.new doc = Nokogiri::HTML(open(url)) # get the title doc.css('span.text20pxOrange').each do |movieChunk| values['title'] = movieChunk.content.strip end # get the description doc.css('span.text11px').each do |movieChunk| #puts movieChunk.content desc = movieChunk.content #desc.gsub! "Now Playing" "" values['desc'] = desc.squeeze(" ").strip end # get the duration doc.css('td.text11pxFR').each do |movieChunk| #puts movieChunk.content duration = movieChunk.content.strip duration.delete! "minutes" duration.delete! "min" #puts duration values['dur'] = duration.strip end #pp values values end def get_french_url(url_eng) url_eng.gsub("_e.asp", "_f.asp") end # for parsing a specific page for description, duration, etc. # assumes that there is a url def parse_movie_page(movies) movies.each_value do |movie| values = parse_page(movie.urlE) movie.descE = values["desc"] movie.duration = values["dur"] #pp movie movie.urlF = get_french_url(movie.urlE) values = parse_page(movie.urlF) movie.titleF = values["title"] movie.descF = values["desc"] end end # create and return a ical def create_ical(movies, use_eng) cal = RiCal.Calendar do |cal| # set the timezones correctly so the dates match tz = TZInfo::Timezone.get('America/Toronto') RiCal::PropertyValue::DateTime.default_tzid=tz.identifier() movies.each_value do |movie| times = use_eng ? movie.timesE : movie.timesF times.each do |time| cal.event do |event| start_time = Time.parse(time) #puts 'got here for movie ' << movie.titleE #pp movie event.summary = use_eng ? movie.titleE : movie.titleF #event.description = use_eng ? movie.descE : movie.descF # add in the url so that it shows up in the google calendar event.description = use_eng ? movie.descE + "\n\n" + movie.urlE : movie.descF + "\n\n" + movie.urlF event.dtstart = start_time event.dtend = start_time + (movie.duration.to_i * 60) event.location = "100 Laurier Street Gatineau, Quebec, Canada" event.url = use_eng ? movie.urlE : movie.urlF end end end end end def create_calenders url = 'http://www.civilization.ca/imax/films_horaires_t/schedule_e.asp' doc = Nokogiri::HTML(open(url)) # hash that has a new movie with the key as the english title movies = Hash.new { |h,k| h[k] = Movie.new(k) } parse_movie_times(doc, movies) parse_movie_urls(doc, movies, url) parse_movie_page(movies) #movies.each_value do |movie| #pp movie #end cal_eng = create_ical(movies, true) cal_fre = create_ical(movies, false) cal_eng.export(File.open("schedule_en.ics", 'w')) cal_fre.export(File.open("schedule_fr.ics", 'w')) #puts "Done writing" end # do everything create_calenders