forked from bofh/fetsite
addcrawlobjects
This commit is contained in:
58
app/models/crawlobject.rb
Normal file
58
app/models/crawlobject.rb
Normal file
@@ -0,0 +1,58 @@
|
||||
class Crawlobject < ActiveRecord::Base
|
||||
attr_accessible :children_count, :crawltime, :crawlurl, :depth, :lft, :name, :parent_id, :published_at, :raw, :referenced, :rgt, :schematype, :text, :type, :url
|
||||
acts_as_nested_set
|
||||
|
||||
belongs_to :something, :polymorphic=>true
|
||||
def parse_children
|
||||
if self.objtype == 1 # ET Forum Article loaded
|
||||
self.json["comments"].each do |com|
|
||||
if self.children.where(:objhash=>Digest::SHA512.hexdigest(com.to_json)).empty?
|
||||
cco = self.children.new(:raw=>com.to_json,:crawlurl=>self.crawlurl)
|
||||
cco.objtype=2
|
||||
cco.parse_object
|
||||
cco.calc_hash
|
||||
|
||||
if self.children.where(:objhash2=>cco.objhash2).empty?
|
||||
cco.save
|
||||
else
|
||||
cco=self.children.where(:objhash2=>cco.objhash2).first
|
||||
cco.raw=com.to_json
|
||||
cco.parse_object
|
||||
cco.calc_hash
|
||||
cco.save
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
def parse_object
|
||||
|
||||
if self.objtype == 1 # ET Forum Article loaded
|
||||
# ET - Forum
|
||||
self.schematype = self.json["type"].first
|
||||
self.name = self.json["properties"]["name"].first
|
||||
self.url = self.json["properties"]["url"].first
|
||||
self.published_at = self.json["properties"]["dateCreated"].first
|
||||
|
||||
end
|
||||
if self.objtype == 2 # ET Forum Comment loaded is part of Article
|
||||
self.schematype = self.json["type"].first
|
||||
self.url = self.json["properties"]["replyToUrl"].first
|
||||
self.name = self.json["properties"]["name"].try(:first)
|
||||
self.published_at = self.json["properties"]["commentTime"].try(:first)
|
||||
self.text = self.json["properties"]["commentText"].try(:first)
|
||||
|
||||
|
||||
|
||||
end
|
||||
end
|
||||
def calc_hash
|
||||
self.objhash=Digest::SHA512.hexdigest(self.raw)
|
||||
self.objhash2=Digest::SHA512.hexdigest(self.url.to_s+self.try(:name).to_s+self.schematype.to_s+self.published_at.utc.to_s)
|
||||
|
||||
end
|
||||
def json
|
||||
JSON.parse(self.raw)
|
||||
end
|
||||
|
||||
end
|
||||
@@ -46,7 +46,7 @@ class Lva < ActiveRecord::Base
|
||||
has_many :beispiele , :class_name => "Beispiel"
|
||||
has_and_belongs_to_many :lecturers
|
||||
has_many :nlinks, as: :link
|
||||
|
||||
has_many :crawlobjects, :as=>:something
|
||||
# scope :search, ->(query) {where("name like ? or lvas.desc like ?", "%#{query}%", "%#{query}%")}
|
||||
|
||||
validates :lvanr,:format=>{ :with => /^[0-9][0-9][0-9]\.[0-9A][0-9][0-9]$/}, :presence=>true, :uniqueness=>true # , :uniqueness=>true # LVA-Nummer muss das Format 000.000 besitzen (uniqueness?) oder 000 für nicht
|
||||
@@ -234,5 +234,33 @@ class Lva < ActiveRecord::Base
|
||||
newlvas
|
||||
|
||||
end
|
||||
|
||||
|
||||
def read_et_forum
|
||||
lva=self
|
||||
url=lva.forumlink
|
||||
ans = JSON.parse(`python ../microdata/downloadlogin.py #{url}`)
|
||||
ans.each do |a|
|
||||
if Crawlobject.where(:objhash=>Digest::SHA512.hexdigest(a.to_json), :objtype=>1).count ==0
|
||||
aa = Crawlobject.new(:raw=>a.to_json)
|
||||
aa.objtype=1
|
||||
aa.parse_object
|
||||
aa.calc_hash
|
||||
aa.something=lva
|
||||
if Crawlobject.where(:objhash2=>aa.objhash2, :objtype=>1).count==0
|
||||
aa.save
|
||||
else
|
||||
aa=Crawlobject.where(:objhash2=>aa.objhash2).first
|
||||
aa.raw=a.to_json
|
||||
aa.parse_object
|
||||
aa.calc_hash
|
||||
aa.save
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
|
||||
end
|
||||
|
||||
|
||||
Reference in New Issue
Block a user