diff --git a/app/controllers/crawler_controller.rb b/app/controllers/crawler_controller.rb new file mode 100644 index 0000000..0ceab2e --- /dev/null +++ b/app/controllers/crawler_controller.rb @@ -0,0 +1,29 @@ +class CrawlerController < ApplicationController + + def index + authorize! :doadmin, User + + @crawlobjs=Crawlobject.where(:objtype=>5) + end + def load_attachments + authorize! :doadmin, User + + c = Crawlobject.find(params[:id]) + if c.has_attachments? + c.load_attachments + end + respond_to do |format| + format.html {redirect_to c.something} + format.js + end + end + def move_to_news + authorize! :doadmin, User + + co=Crawlobject.find(params[:id]) + if co.objtype == 5 + co.move_to_neuigkeit(User.find(Rails.configuration.crawlconfig[5]["user_id"]), Rubrik.find(Rails.configuration.crawlconfig[5]["rubrik_id"])) + redirect_to co.something + end + end +end diff --git a/app/helpers/crawlobject_helper.rb b/app/helpers/crawlobject_helper.rb new file mode 100644 index 0000000..59aec5c --- /dev/null +++ b/app/helpers/crawlobject_helper.rb @@ -0,0 +1,4 @@ +module CrawlobjectHelper + + +end diff --git a/app/models/crawlobject.rb b/app/models/crawlobject.rb index 0071c57..d906c9e 100644 --- a/app/models/crawlobject.rb +++ b/app/models/crawlobject.rb @@ -1,8 +1,44 @@ class Crawlobject < ActiveRecord::Base attr_accessible :children_count, :crawltime, :crawlurl, :depth, :lft, :name, :parent_id, :published_at, :raw, :referenced, :rgt, :schematype, :text, :type, :url - acts_as_nested_set - + acts_as_nested_set + has_many :attachments, :as=>:parent + belongs_to :something, :polymorphic=>true + def self.config + Rails.application.config.crawlconfig + end + def has_attachments? + if self.objtype==2 + return true + else + return false + end + end + def move_to_neuigkeit(user,rubrik) + if self.objtype == 5 and self.something.nil? + n=Neuigkeit.new + n.title=self.name + n.text=self.text + n.datum=self.published_at + n.author=user + n.rubrik=rubrik + n.origurl = self.url + n.save + self.something=n + self.save + return n + elsif self.objtype == 5 + n=self.something + n.title=self.name + n.text=self.text + n.datum=self.published_at + n.author=user + n.rubrik=rubrik + n.origurl = self.url + n.save + + end + end def parse_children if self.objtype == 1 # ET Forum Article loaded self.json["comments"].each do |com| @@ -25,6 +61,25 @@ class Crawlobject < ActiveRecord::Base end end end + + def load_attachments + if self.objtype == 2 # ET Comments only + self.json["attachments"].each do |url| + fn = `python ../microdata/download_file.py "#{url}"` + + unless self.attachments.where(:name=>"Et_21.01.2015_L_sung.pdf").count > 0 + + a=Attachment.new + a.datei=File.open("/home/andreas/www/microdata/tmp/"+fn.strip) + a.name=fn.strip + a.parent=self + a.save + self.attachments<< a + a.save + end + end + end + end def parse_object if self.objtype == 1 # ET Forum Article loaded @@ -41,9 +96,12 @@ class Crawlobject < ActiveRecord::Base self.name = self.json["properties"]["name"].try(:first) self.published_at = self.json["properties"]["commentTime"].try(:first) self.text = self.json["properties"]["commentText"].try(:first) - - - + end + if self.objtype==5 + self.name=self.json["name"].strip + self.text=self.json["text"] + self.published_at=Time.parse(self.json["date"].strip) + self.url="http://www.htu.at" end end def calc_hash @@ -54,5 +112,22 @@ class Crawlobject < ActiveRecord::Base def json JSON.parse(self.raw) end - + def self.crawl_htu + res = JSON.parse(`python ../microdata/foswikicrawl.py`) + res.each do |r| + cc=Crawlobject.new(:raw=>r.to_json) + cc.objtype=5 + cc.parse_object + cc.calc_hash + if Crawlobject.where(:objhash2=>cc.objhash2, :objtype=>5).count==0 + cc.save + else + cc = Crawlobject.where(:objhash2=>cc.objhash2, :objtype=>5).first + cc.raw=r.to_json + cc.parse_object + cc.calc_hash + cc.save + end + end + end end diff --git a/app/views/crawler/index.html.erb b/app/views/crawler/index.html.erb new file mode 100644 index 0000000..08eeeb1 --- /dev/null +++ b/app/views/crawler/index.html.erb @@ -0,0 +1,9 @@ +
| <%= render co %> | ++<%= link_to "move_to_news",move_to_news_crawler_path(co) %> +<%= render co.something unless co.something.nil? %> + |