diff --git a/app/controllers/lvas_controller.rb b/app/controllers/lvas_controller.rb index ee2732c..54e0c98 100755 --- a/app/controllers/lvas_controller.rb +++ b/app/controllers/lvas_controller.rb @@ -45,7 +45,7 @@ class LvasController < ApplicationController @beispiel=Beispiel.new @toolbar_elements =[] @toolbar_elements<<{:hicon=>'icon-pencil', :icon=>:pencil,:text =>I18n.t('common.manage'),:path => verwalten_lva_path(@lva)} if can? :verwalten, @lva - + @crawlobjects = @lva.crawlobjects.roots.accessible_by(current_ability) end def verwalten @lva = Lva.find_by_id(params[:id]) diff --git a/app/models/crawlobject.rb b/app/models/crawlobject.rb new file mode 100644 index 0000000..0071c57 --- /dev/null +++ b/app/models/crawlobject.rb @@ -0,0 +1,58 @@ +class Crawlobject < ActiveRecord::Base + attr_accessible :children_count, :crawltime, :crawlurl, :depth, :lft, :name, :parent_id, :published_at, :raw, :referenced, :rgt, :schematype, :text, :type, :url + acts_as_nested_set + + belongs_to :something, :polymorphic=>true + def parse_children + if self.objtype == 1 # ET Forum Article loaded + self.json["comments"].each do |com| + if self.children.where(:objhash=>Digest::SHA512.hexdigest(com.to_json)).empty? + cco = self.children.new(:raw=>com.to_json,:crawlurl=>self.crawlurl) + cco.objtype=2 + cco.parse_object + cco.calc_hash + + if self.children.where(:objhash2=>cco.objhash2).empty? + cco.save + else + cco=self.children.where(:objhash2=>cco.objhash2).first + cco.raw=com.to_json + cco.parse_object + cco.calc_hash + cco.save + end + end + end + end + end + def parse_object + + if self.objtype == 1 # ET Forum Article loaded + # ET - Forum + self.schematype = self.json["type"].first + self.name = self.json["properties"]["name"].first + self.url = self.json["properties"]["url"].first + self.published_at = self.json["properties"]["dateCreated"].first + + end + if self.objtype == 2 # ET Forum Comment loaded is part of Article + self.schematype = self.json["type"].first + self.url = self.json["properties"]["replyToUrl"].first + self.name = self.json["properties"]["name"].try(:first) + self.published_at = self.json["properties"]["commentTime"].try(:first) + self.text = self.json["properties"]["commentText"].try(:first) + + + + end + end + def calc_hash + self.objhash=Digest::SHA512.hexdigest(self.raw) + self.objhash2=Digest::SHA512.hexdigest(self.url.to_s+self.try(:name).to_s+self.schematype.to_s+self.published_at.utc.to_s) + + end + def json + JSON.parse(self.raw) + end + +end diff --git a/app/models/lva.rb b/app/models/lva.rb index c8752b4..6b56b4b 100755 --- a/app/models/lva.rb +++ b/app/models/lva.rb @@ -46,7 +46,7 @@ class Lva < ActiveRecord::Base has_many :beispiele , :class_name => "Beispiel" has_and_belongs_to_many :lecturers has_many :nlinks, as: :link - + has_many :crawlobjects, :as=>:something # scope :search, ->(query) {where("name like ? or lvas.desc like ?", "%#{query}%", "%#{query}%")} validates :lvanr,:format=>{ :with => /^[0-9][0-9][0-9]\.[0-9A][0-9][0-9]$/}, :presence=>true, :uniqueness=>true # , :uniqueness=>true # LVA-Nummer muss das Format 000.000 besitzen (uniqueness?) oder 000 für nicht @@ -234,5 +234,33 @@ class Lva < ActiveRecord::Base newlvas end + + + def read_et_forum + lva=self + url=lva.forumlink + ans = JSON.parse(`python ../microdata/downloadlogin.py #{url}`) + ans.each do |a| + if Crawlobject.where(:objhash=>Digest::SHA512.hexdigest(a.to_json), :objtype=>1).count ==0 + aa = Crawlobject.new(:raw=>a.to_json) + aa.objtype=1 + aa.parse_object + aa.calc_hash + aa.something=lva + if Crawlobject.where(:objhash2=>aa.objhash2, :objtype=>1).count==0 + aa.save + else + aa=Crawlobject.where(:objhash2=>aa.objhash2).first + aa.raw=a.to_json + aa.parse_object + aa.calc_hash + aa.save + end + end + end + end + + end + diff --git a/app/views/crawlobjects/_crawlobject.html.erb b/app/views/crawlobjects/_crawlobject.html.erb new file mode 100644 index 0000000..3df8f92 --- /dev/null +++ b/app/views/crawlobjects/_crawlobject.html.erb @@ -0,0 +1,11 @@ +
  • <%= link_to crawlobject.url do %> <%= crawlobject.name %> (<%= crawlobject.published_at %>) <%= crawlobject.text %> +<% end %> + + <% if crawlobject.children.count >0 %> + +<% end %> +
  • diff --git a/app/views/lvas/show.html.erb b/app/views/lvas/show.html.erb index ed6e0d9..bede27a 100755 --- a/app/views/lvas/show.html.erb +++ b/app/views/lvas/show.html.erb @@ -108,6 +108,14 @@ <%= render 'beispiele/form_bulk' %> +
    +
    + <% @crawlobjects.each do |co| %> + <%= render co %> + <% end %> +
    +
    + <% end %> diff --git a/db/migrate/20150330191029_create_crawlobjects.rb b/db/migrate/20150330191029_create_crawlobjects.rb new file mode 100644 index 0000000..d3ea8b2 --- /dev/null +++ b/db/migrate/20150330191029_create_crawlobjects.rb @@ -0,0 +1,22 @@ +class CreateCrawlobjects < ActiveRecord::Migration + def change + create_table :crawlobjects do |t| + t.string :name + t.text :text + t.text :raw + t.integer :objtype, :index=>true + t.string :schematype + t.string :crawlurl + t.string :url + t.timestamp :crawltime + t.timestamp :published_at + t.references :something, :polymorphic => true + t.integer :parent_id, :index => true + t.integer :lft, :null => false ,:index =>true + t.integer :rgt, :null => false, :index => true + t.integer :depth, :null => false, :default => 0 + t.integer :children_count, :null => false, :default => 0 + t.timestamps + end + end +end diff --git a/db/migrate/20150330205419_add_hash_to_crawlobjects.rb b/db/migrate/20150330205419_add_hash_to_crawlobjects.rb new file mode 100644 index 0000000..53bedf1 --- /dev/null +++ b/db/migrate/20150330205419_add_hash_to_crawlobjects.rb @@ -0,0 +1,14 @@ +class AddHashToCrawlobjects < ActiveRecord::Migration + def up + change_table :crawlobjects do |t| + t.string :objhash, :index=>true + t.string :objhash2, :index=>true + + end + end + def down + remove_column :crawlobjects, :objhash + remove_column :crawlobjects, :objhash2 + + end +end diff --git a/spec/factories/crawlobjects.rb b/spec/factories/crawlobjects.rb new file mode 100644 index 0000000..a7ca249 --- /dev/null +++ b/spec/factories/crawlobjects.rb @@ -0,0 +1,20 @@ +FactoryGirl.define do + factory :crawlobject do + name "MyString" +text "MyText" +raw "MyText" +type 1 +schematype "MyString" +crawlurl "MyString" +url "" +crawltime "2015-03-30 21:10:29" +published_at "2015-03-30 21:10:29" +referenced "" +parent_id 1 +lft 1 +rgt 1 +depth 1 +children_count 1 + end + +end diff --git a/spec/models/crawlobject_spec.rb b/spec/models/crawlobject_spec.rb new file mode 100644 index 0000000..dee007d --- /dev/null +++ b/spec/models/crawlobject_spec.rb @@ -0,0 +1,5 @@ +require 'rails_helper' + +RSpec.describe Crawlobject, :type => :model do + pending "add some examples to (or delete) #{__FILE__}" +end