addcrawlobjects
This commit is contained in:
@@ -45,7 +45,7 @@ class LvasController < ApplicationController
|
|||||||
@beispiel=Beispiel.new
|
@beispiel=Beispiel.new
|
||||||
@toolbar_elements =[]
|
@toolbar_elements =[]
|
||||||
@toolbar_elements<<{:hicon=>'icon-pencil', :icon=>:pencil,:text =>I18n.t('common.manage'),:path => verwalten_lva_path(@lva)} if can? :verwalten, @lva
|
@toolbar_elements<<{:hicon=>'icon-pencil', :icon=>:pencil,:text =>I18n.t('common.manage'),:path => verwalten_lva_path(@lva)} if can? :verwalten, @lva
|
||||||
|
@crawlobjects = @lva.crawlobjects.roots.accessible_by(current_ability)
|
||||||
end
|
end
|
||||||
def verwalten
|
def verwalten
|
||||||
@lva = Lva.find_by_id(params[:id])
|
@lva = Lva.find_by_id(params[:id])
|
||||||
|
|||||||
58
app/models/crawlobject.rb
Normal file
58
app/models/crawlobject.rb
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
class Crawlobject < ActiveRecord::Base
|
||||||
|
attr_accessible :children_count, :crawltime, :crawlurl, :depth, :lft, :name, :parent_id, :published_at, :raw, :referenced, :rgt, :schematype, :text, :type, :url
|
||||||
|
acts_as_nested_set
|
||||||
|
|
||||||
|
belongs_to :something, :polymorphic=>true
|
||||||
|
def parse_children
|
||||||
|
if self.objtype == 1 # ET Forum Article loaded
|
||||||
|
self.json["comments"].each do |com|
|
||||||
|
if self.children.where(:objhash=>Digest::SHA512.hexdigest(com.to_json)).empty?
|
||||||
|
cco = self.children.new(:raw=>com.to_json,:crawlurl=>self.crawlurl)
|
||||||
|
cco.objtype=2
|
||||||
|
cco.parse_object
|
||||||
|
cco.calc_hash
|
||||||
|
|
||||||
|
if self.children.where(:objhash2=>cco.objhash2).empty?
|
||||||
|
cco.save
|
||||||
|
else
|
||||||
|
cco=self.children.where(:objhash2=>cco.objhash2).first
|
||||||
|
cco.raw=com.to_json
|
||||||
|
cco.parse_object
|
||||||
|
cco.calc_hash
|
||||||
|
cco.save
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
def parse_object
|
||||||
|
|
||||||
|
if self.objtype == 1 # ET Forum Article loaded
|
||||||
|
# ET - Forum
|
||||||
|
self.schematype = self.json["type"].first
|
||||||
|
self.name = self.json["properties"]["name"].first
|
||||||
|
self.url = self.json["properties"]["url"].first
|
||||||
|
self.published_at = self.json["properties"]["dateCreated"].first
|
||||||
|
|
||||||
|
end
|
||||||
|
if self.objtype == 2 # ET Forum Comment loaded is part of Article
|
||||||
|
self.schematype = self.json["type"].first
|
||||||
|
self.url = self.json["properties"]["replyToUrl"].first
|
||||||
|
self.name = self.json["properties"]["name"].try(:first)
|
||||||
|
self.published_at = self.json["properties"]["commentTime"].try(:first)
|
||||||
|
self.text = self.json["properties"]["commentText"].try(:first)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
||||||
|
def calc_hash
|
||||||
|
self.objhash=Digest::SHA512.hexdigest(self.raw)
|
||||||
|
self.objhash2=Digest::SHA512.hexdigest(self.url.to_s+self.try(:name).to_s+self.schematype.to_s+self.published_at.utc.to_s)
|
||||||
|
|
||||||
|
end
|
||||||
|
def json
|
||||||
|
JSON.parse(self.raw)
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
@@ -46,7 +46,7 @@ class Lva < ActiveRecord::Base
|
|||||||
has_many :beispiele , :class_name => "Beispiel"
|
has_many :beispiele , :class_name => "Beispiel"
|
||||||
has_and_belongs_to_many :lecturers
|
has_and_belongs_to_many :lecturers
|
||||||
has_many :nlinks, as: :link
|
has_many :nlinks, as: :link
|
||||||
|
has_many :crawlobjects, :as=>:something
|
||||||
# scope :search, ->(query) {where("name like ? or lvas.desc like ?", "%#{query}%", "%#{query}%")}
|
# scope :search, ->(query) {where("name like ? or lvas.desc like ?", "%#{query}%", "%#{query}%")}
|
||||||
|
|
||||||
validates :lvanr,:format=>{ :with => /^[0-9][0-9][0-9]\.[0-9A][0-9][0-9]$/}, :presence=>true, :uniqueness=>true # , :uniqueness=>true # LVA-Nummer muss das Format 000.000 besitzen (uniqueness?) oder 000 für nicht
|
validates :lvanr,:format=>{ :with => /^[0-9][0-9][0-9]\.[0-9A][0-9][0-9]$/}, :presence=>true, :uniqueness=>true # , :uniqueness=>true # LVA-Nummer muss das Format 000.000 besitzen (uniqueness?) oder 000 für nicht
|
||||||
@@ -235,4 +235,32 @@ class Lva < ActiveRecord::Base
|
|||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
def read_et_forum
|
||||||
|
lva=self
|
||||||
|
url=lva.forumlink
|
||||||
|
ans = JSON.parse(`python ../microdata/downloadlogin.py #{url}`)
|
||||||
|
ans.each do |a|
|
||||||
|
if Crawlobject.where(:objhash=>Digest::SHA512.hexdigest(a.to_json), :objtype=>1).count ==0
|
||||||
|
aa = Crawlobject.new(:raw=>a.to_json)
|
||||||
|
aa.objtype=1
|
||||||
|
aa.parse_object
|
||||||
|
aa.calc_hash
|
||||||
|
aa.something=lva
|
||||||
|
if Crawlobject.where(:objhash2=>aa.objhash2, :objtype=>1).count==0
|
||||||
|
aa.save
|
||||||
|
else
|
||||||
|
aa=Crawlobject.where(:objhash2=>aa.objhash2).first
|
||||||
|
aa.raw=a.to_json
|
||||||
|
aa.parse_object
|
||||||
|
aa.calc_hash
|
||||||
|
aa.save
|
||||||
end
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
|||||||
11
app/views/crawlobjects/_crawlobject.html.erb
Normal file
11
app/views/crawlobjects/_crawlobject.html.erb
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
<li><%= link_to crawlobject.url do %> <b> <%= crawlobject.name %> (<%= crawlobject.published_at %>)</b> <%= crawlobject.text %>
|
||||||
|
<% end %>
|
||||||
|
|
||||||
|
<% if crawlobject.children.count >0 %>
|
||||||
|
<ul>
|
||||||
|
<% crawlobject.children.each do |cc| %>
|
||||||
|
<%= render cc %>
|
||||||
|
<% end %>
|
||||||
|
</ul>
|
||||||
|
<% end %>
|
||||||
|
</li>
|
||||||
@@ -108,6 +108,14 @@
|
|||||||
<%= render 'beispiele/form_bulk' %>
|
<%= render 'beispiele/form_bulk' %>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
<div class="row-fluid">
|
||||||
|
<div class="span12">
|
||||||
|
<% @crawlobjects.each do |co| %>
|
||||||
|
<%= render co %>
|
||||||
|
<% end %>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
<% end %>
|
<% end %>
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
22
db/migrate/20150330191029_create_crawlobjects.rb
Normal file
22
db/migrate/20150330191029_create_crawlobjects.rb
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
class CreateCrawlobjects < ActiveRecord::Migration
|
||||||
|
def change
|
||||||
|
create_table :crawlobjects do |t|
|
||||||
|
t.string :name
|
||||||
|
t.text :text
|
||||||
|
t.text :raw
|
||||||
|
t.integer :objtype, :index=>true
|
||||||
|
t.string :schematype
|
||||||
|
t.string :crawlurl
|
||||||
|
t.string :url
|
||||||
|
t.timestamp :crawltime
|
||||||
|
t.timestamp :published_at
|
||||||
|
t.references :something, :polymorphic => true
|
||||||
|
t.integer :parent_id, :index => true
|
||||||
|
t.integer :lft, :null => false ,:index =>true
|
||||||
|
t.integer :rgt, :null => false, :index => true
|
||||||
|
t.integer :depth, :null => false, :default => 0
|
||||||
|
t.integer :children_count, :null => false, :default => 0
|
||||||
|
t.timestamps
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
14
db/migrate/20150330205419_add_hash_to_crawlobjects.rb
Normal file
14
db/migrate/20150330205419_add_hash_to_crawlobjects.rb
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
class AddHashToCrawlobjects < ActiveRecord::Migration
|
||||||
|
def up
|
||||||
|
change_table :crawlobjects do |t|
|
||||||
|
t.string :objhash, :index=>true
|
||||||
|
t.string :objhash2, :index=>true
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
||||||
|
def down
|
||||||
|
remove_column :crawlobjects, :objhash
|
||||||
|
remove_column :crawlobjects, :objhash2
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
||||||
20
spec/factories/crawlobjects.rb
Normal file
20
spec/factories/crawlobjects.rb
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
FactoryGirl.define do
|
||||||
|
factory :crawlobject do
|
||||||
|
name "MyString"
|
||||||
|
text "MyText"
|
||||||
|
raw "MyText"
|
||||||
|
type 1
|
||||||
|
schematype "MyString"
|
||||||
|
crawlurl "MyString"
|
||||||
|
url ""
|
||||||
|
crawltime "2015-03-30 21:10:29"
|
||||||
|
published_at "2015-03-30 21:10:29"
|
||||||
|
referenced ""
|
||||||
|
parent_id 1
|
||||||
|
lft 1
|
||||||
|
rgt 1
|
||||||
|
depth 1
|
||||||
|
children_count 1
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
5
spec/models/crawlobject_spec.rb
Normal file
5
spec/models/crawlobject_spec.rb
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
require 'rails_helper'
|
||||||
|
|
||||||
|
RSpec.describe Crawlobject, :type => :model do
|
||||||
|
pending "add some examples to (or delete) #{__FILE__}"
|
||||||
|
end
|
||||||
Reference in New Issue
Block a user