ActiveRecord::Base
# File app/models/crawlobject.rb, line 7 def self.config Rails.application.config.crawlconfig end
# File app/models/crawlobject.rb, line 122 def self.crawl_news(id) cfg = Rails.application.config res = JSON.parse(`python #{Rails.root}/bin/#{cfg.crawlconfig[id]['bin']} #{cfg.crawlconfig[id]['url']}`) res.each do |r| cc=Crawlobject.new(:raw=>r.to_json) cc.objtype=id cc.parse_object cc.calc_hash if Crawlobject.where(:objhash2=>cc.objhash2, :objtype=>id).count==0 cc.save else cc = Crawlobject.where(:objhash2=>cc.objhash2, :objtype=>id).first cc.raw=r.to_json cc.parse_object cc.calc_hash cc.save end end end
# File app/models/crawlobject.rb, line 114 def calc_hash self.objhash=Digest::SHA512.hexdigest(self.raw) self.objhash2=Digest::SHA512.hexdigest(self.url.to_s+self.try(:name).to_s+self.schematype.to_s+self.published_at.try(:utc).to_s) end
# File app/models/crawlobject.rb, line 10 def has_attachments? if self.objtype==2 return true else return false end end
# File app/models/crawlobject.rb, line 119 def json JSON.parse(self.raw) end
# File app/models/crawlobject.rb, line 65 def load_attachments if self.objtype == 2 # ET Comments only self.json["attachments"].each do |url| fn = `python ../microdata/download_file.py "#{url}"` unless self.attachments.where(:name=>"Et_21.01.2015_L_sung.pdf").count > 0 a=Attachment.new a.datei=File.open("/home/andreas/www/microdata/tmp/"+fn.strip) a.name=fn.strip a.parent=self a.save self.attachments<< a a.save end end end end
# File app/models/crawlobject.rb, line 17 def move_to_neuigkeit(user,rubrik) if ( self.objtype == 5 || self.objtype == 6 )and self.something.nil? n=Neuigkeit.new n.title=self.name n.text=self.text n.datum=self.published_at n.author=user n.rubrik=rubrik n.origurl = self.url n.save self.something=n self.save return n elsif ( self.objtype == 5 || self.objtype==6) n=self.something n.title=self.name n.text=self.text n.datum=self.published_at n.author=user n.rubrik=rubrik n.origurl = self.url n.save end end
# File app/models/crawlobject.rb, line 42 def parse_children if self.objtype == 1 # ET Forum Article loaded self.json["comments"].each do |com| if self.children.where(:objhash=>Digest::SHA512.hexdigest(com.to_json)).empty? cco = self.children.new(:raw=>com.to_json,:crawlurl=>self.crawlurl) cco.objtype=2 cco.parse_object cco.calc_hash if self.children.where(:objhash2=>cco.objhash2).empty? cco.save else cco=self.children.where(:objhash2=>cco.objhash2).first cco.raw=com.to_json cco.parse_object cco.calc_hash cco.save end end end end end
# File app/models/crawlobject.rb, line 83 def parse_object if self.objtype == 1 # ET Forum Article loaded # ET - Forum self.schematype = self.json["type"].first self.name = self.json["properties"]["name"].first self.url = self.json["properties"]["url"].first self.published_at = self.json["properties"]["dateCreated"].first end if self.objtype == 2 # ET Forum Comment loaded is part of Article self.schematype = self.json["type"].first self.url = self.json["properties"]["replyToUrl"].first self.name = self.json["properties"]["name"].try(:first) self.published_at = self.json["properties"]["commentTime"].try(:first) self.text = self.json["properties"]["commentText"].try(:first) end if self.objtype==5 self.name=self.json["name"].strip self.text=self.json["text"] self.published_at=Time.parse(self.json["date"].strip) self.url="http://www.htu.at" end if self.objtype==6 self.name=self.json["name"].strip self.text=self.json["text"] self.published_at=Time.parse(self.json["date"].strip) self.url="http://etit.tuwien.ac.at" end end
Generated with the Darkfish Rdoc Generator 2.