|
|
@ -32,7 +32,7 @@ module TrainingWatch |
|
|
@sources.find_each do |source| |
|
|
@sources.find_each do |source| |
|
|
begin |
|
|
begin |
|
|
new_hits = check_one!(source) |
|
|
new_hits = check_one!(source) |
|
|
results << Result.new(source:, new_hits:) if new_hits.any? |
|
|
|
|
|
|
|
|
results << Result.new(source: source, new_hits: new_hits) if new_hits.any? |
|
|
rescue => e |
|
|
rescue => e |
|
|
@logger.warn("[TrainingWatch] source=#{source.id} #{source.name} failed: #{e.class}: #{e.message}") |
|
|
@logger.warn("[TrainingWatch] source=#{source.id} #{source.name} failed: #{e.class}: #{e.message}") |
|
|
ensure |
|
|
ensure |
|
|
@ -97,7 +97,7 @@ module TrainingWatch |
|
|
|
|
|
|
|
|
label = a.text.to_s.gsub(/\s+/, " ").strip |
|
|
label = a.text.to_s.gsub(/\s+/, " ").strip |
|
|
abs = absolutize_url(source.url, href) |
|
|
abs = absolutize_url(source.url, href) |
|
|
{ label:, url: abs } |
|
|
|
|
|
|
|
|
{ label: label, url: abs } |
|
|
end.compact |
|
|
end.compact |
|
|
|
|
|
|
|
|
# Wenn Links explizit 2027 enthalten, priorisieren, sonst Seitenhit |
|
|
# Wenn Links explizit 2027 enthalten, priorisieren, sonst Seitenhit |
|
|
@ -107,7 +107,7 @@ module TrainingWatch |
|
|
if link_hits.any? |
|
|
if link_hits.any? |
|
|
link_hits.each do |c| |
|
|
link_hits.each do |c| |
|
|
hits << create_hit!( |
|
|
hits << create_hit!( |
|
|
source:, |
|
|
|
|
|
|
|
|
source: source, |
|
|
title: c[:label].presence || page_title, |
|
|
title: c[:label].presence || page_title, |
|
|
hit_url: c[:url], |
|
|
hit_url: c[:url], |
|
|
snippet: build_snippet(text, regex) |
|
|
snippet: build_snippet(text, regex) |
|
|
@ -115,7 +115,7 @@ module TrainingWatch |
|
|
end |
|
|
end |
|
|
else |
|
|
else |
|
|
hits << create_hit!( |
|
|
hits << create_hit!( |
|
|
source:, |
|
|
|
|
|
|
|
|
source: source, |
|
|
title: page_title || source.name, |
|
|
title: page_title || source.name, |
|
|
hit_url: source.url, |
|
|
hit_url: source.url, |
|
|
snippet: build_snippet(text, regex) |
|
|
snippet: build_snippet(text, regex) |
|
|
@ -133,7 +133,7 @@ module TrainingWatch |
|
|
return [] unless body.match?(regex) |
|
|
return [] unless body.match?(regex) |
|
|
|
|
|
|
|
|
create = create_hit!( |
|
|
create = create_hit!( |
|
|
source:, |
|
|
|
|
|
|
|
|
source: source, |
|
|
title: source.name, |
|
|
title: source.name, |
|
|
hit_url: source.url, |
|
|
hit_url: source.url, |
|
|
snippet: build_snippet(body.gsub(/\s+/, " "), regex) |
|
|
snippet: build_snippet(body.gsub(/\s+/, " "), regex) |
|
|
@ -173,7 +173,7 @@ module TrainingWatch |
|
|
title: title.to_s.strip.presence, |
|
|
title: title.to_s.strip.presence, |
|
|
hit_url: hit_url.to_s.strip.presence, |
|
|
hit_url: hit_url.to_s.strip.presence, |
|
|
snippet: snippet.to_s.strip.presence, |
|
|
snippet: snippet.to_s.strip.presence, |
|
|
published_at:, |
|
|
|
|
|
|
|
|
published_at: published_at, |
|
|
fingerprint: fp |
|
|
fingerprint: fp |
|
|
) |
|
|
) |
|
|
rescue ActiveRecord::RecordNotUnique |
|
|
rescue ActiveRecord::RecordNotUnique |
|
|
|