diff --git a/Gemfile b/Gemfile
index 7d4487d..e92c973 100644
--- a/Gemfile
+++ b/Gemfile
@@ -59,4 +59,6 @@ end
gem 'devise'
gem 'rufus-scheduler'
-gem 'pghero'
\ No newline at end of file
+gem 'pghero'
+gem "faraday"
+gem "nokogiri"
diff --git a/Gemfile.lock b/Gemfile.lock
index 038e8e1..371f8b6 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -110,6 +110,12 @@ GEM
erubi (1.13.1)
et-orbi (1.4.0)
tzinfo
+ faraday (2.14.0)
+ faraday-net_http (>= 2.0, < 3.5)
+ json
+ logger
+ faraday-net_http (3.4.2)
+ net-http (~> 0.5)
ffi (1.17.2-x64-mingw32)
ffi (1.17.2-x86_64-linux-gnu)
fugit (1.12.1)
@@ -133,6 +139,7 @@ GEM
thor (>= 0.14, < 2.0)
jquery-ui-rails (8.0.0)
railties (>= 3.2.16)
+ json (2.17.1)
kaminari (1.2.2)
activesupport (>= 4.1.0)
kaminari-actionview (= 1.2.2)
@@ -160,6 +167,8 @@ GEM
minitest (5.26.1)
msgpack (1.8.0)
mutex_m (0.3.0)
+ net-http (0.8.0)
+ uri (>= 0.11.1)
net-imap (0.4.22)
date
net-protocol
@@ -271,6 +280,7 @@ GEM
concurrent-ruby (~> 1.0)
tzinfo-data (1.2025.2)
tzinfo (>= 1.0.0)
+ uri (1.1.1)
warden (1.2.9)
rack (>= 2.0.9)
web-console (4.2.1)
@@ -292,9 +302,11 @@ DEPENDENCIES
administrate
bootsnap
devise
+ faraday
jbuilder
jquery-rails
jquery-ui-rails
+ nokogiri
pg (~> 1.1)
pghero
puma (>= 5.0)
diff --git a/app/jobs/training_watch_job.rb b/app/jobs/training_watch_job.rb
new file mode 100644
index 0000000..9121a7e
--- /dev/null
+++ b/app/jobs/training_watch_job.rb
@@ -0,0 +1,14 @@
+# app/jobs/training_watch_job.rb
+class TrainingWatchJob < ApplicationJob
+ queue_as :default
+
+ def perform
+ results = TrainingWatch::Checker.new.run!
+ return if results.empty?
+
+ email = ENV.fetch("TRAINING_WATCH_NOTIFY_EMAIL", "christoph@marzell.net")
+ return if email.blank?
+
+ TrainingWatchMailer.notify(email: email, results: results).deliver_now
+ end
+end
diff --git a/app/mailers/training_watch_mailer.rb b/app/mailers/training_watch_mailer.rb
new file mode 100644
index 0000000..37c87ef
--- /dev/null
+++ b/app/mailers/training_watch_mailer.rb
@@ -0,0 +1,7 @@
+# app/mailers/training_watch_mailer.rb
+class TrainingWatchMailer < ApplicationMailer
+ def notify(email:, results:)
+ @results = results
+ mail(to: email, subject: "Lehrgänge 2027: neue Treffer gefunden")
+ end
+end
\ No newline at end of file
diff --git a/app/models/training_watch_hit.rb b/app/models/training_watch_hit.rb
new file mode 100644
index 0000000..00122bc
--- /dev/null
+++ b/app/models/training_watch_hit.rb
@@ -0,0 +1,7 @@
+# frozen_string_literal: true
+# app/models/training_watch_hit.rb
+class TrainingWatchHit < ApplicationRecord
+ belongs_to :training_watch_source
+
+ validates :fingerprint, presence: true, uniqueness: true
+end
diff --git a/app/models/training_watch_source.rb b/app/models/training_watch_source.rb
new file mode 100644
index 0000000..2644199
--- /dev/null
+++ b/app/models/training_watch_source.rb
@@ -0,0 +1,10 @@
+# frozen_string_literal: true
+
+# app/models/training_watch_source.rb
+class TrainingWatchSource < ApplicationRecord
+ has_many :training_watch_hits, dependent: :delete_all
+
+ validates :name, :url, :kind, presence: true
+ validates :url, uniqueness: true
+ validates :kind, inclusion: { in: %w[html rss] }
+end
\ No newline at end of file
diff --git a/app/services/training_watch/checker.rb b/app/services/training_watch/checker.rb
new file mode 100644
index 0000000..67a353f
--- /dev/null
+++ b/app/services/training_watch/checker.rb
@@ -0,0 +1,183 @@
+# app/services/training_watch/checker.rb
+require "digest"
+require "faraday"
+require "nokogiri"
+
+module TrainingWatch
+ class Checker
+ DEFAULT_REGEX = /
+ \b2027\b
+ .*?
+ (start|beginn|lehrgang|curriculum|gruppe|kurs|aufnahme|bewerb|anmeld|termin)
+ /imx
+
+ USER_AGENT = "RailsTrainingWatch/1.0 (+https://your-app.example)"
+
+ Result = Struct.new(:source, :new_hits, keyword_init: true)
+
+ def initialize(sources: TrainingWatchSource.where(enabled: true), logger: Rails.logger)
+ @sources = sources
+ @logger = logger
+ @http = Faraday.new do |f|
+ f.options.timeout = 15
+ f.options.open_timeout = 10
+ f.headers["User-Agent"] = USER_AGENT
+ f.adapter Faraday.default_adapter
+ end
+ end
+
+ def run!
+ results = []
+
+ @sources.find_each do |source|
+ begin
+ new_hits = check_one!(source)
+ results << Result.new(source:, new_hits:) if new_hits.any?
+ rescue => e
+ @logger.warn("[TrainingWatch] source=#{source.id} #{source.name} failed: #{e.class}: #{e.message}")
+ ensure
+ source.update!(last_checked_at: Time.current)
+ end
+ end
+
+ results
+ end
+
+ private
+
+ def check_one!(source)
+ response = fetch(source)
+ return [] if response.nil?
+
+ case source.kind
+ when "html" then check_html(source, response)
+ when "rss" then check_rss(source, response) # optional, stub unten
+ else []
+ end
+ end
+
+ def fetch(source)
+ headers = {}
+ headers["If-None-Match"] = source.last_etag if source.last_etag.present?
+ headers["If-Modified-Since"] = source.last_modified if source.last_modified.present?
+
+ resp = @http.get(source.url, nil, headers)
+
+ if resp.status == 304
+ @logger.info("[TrainingWatch] source=#{source.id} not modified")
+ return nil
+ end
+
+ if resp.status >= 400
+ raise "HTTP #{resp.status}"
+ end
+
+ source.update!(
+ last_etag: resp.headers["etag"],
+ last_modified: resp.headers["last-modified"]
+ )
+
+ resp
+ end
+
+ def check_html(source, response)
+ html = response.body.to_s
+ doc = Nokogiri::HTML(html)
+
+ page_title = doc.at_css("title")&.text&.strip
+ text = doc.text.gsub(/\s+/, " ").strip
+
+ regex = compile_regex(source)
+ return [] unless text.match?(regex)
+
+ # Links sammeln, die auch matchen könnten (oder zumindest "2027" enthalten)
+ candidates = doc.css("a").map do |a|
+ href = a["href"].to_s.strip
+ next if href.empty?
+
+ label = a.text.to_s.gsub(/\s+/, " ").strip
+ abs = absolutize_url(source.url, href)
+ { label:, url: abs }
+ end.compact
+
+ # Wenn Links explizit 2027 enthalten, priorisieren, sonst Seitenhit
+ link_hits = candidates.select { |c| (c[:label] + " " + c[:url]).match?(/\b2027\b/i) }.first(10)
+
+ hits = []
+ if link_hits.any?
+ link_hits.each do |c|
+ hits << create_hit!(
+ source:,
+ title: c[:label].presence || page_title,
+ hit_url: c[:url],
+ snippet: build_snippet(text, regex)
+ )
+ end
+ else
+ hits << create_hit!(
+ source:,
+ title: page_title || source.name,
+ hit_url: source.url,
+ snippet: build_snippet(text, regex)
+ )
+ end
+
+ hits.compact
+ end
+
+ # Optional: RSS support (wenn du später Feed-URLs nutzt)
+ def check_rss(source, response)
+ # Minimal: RSS als Text matchen (robust genug für "2027")
+ body = response.body.to_s
+ regex = compile_regex(source)
+ return [] unless body.match?(regex)
+
+ create = create_hit!(
+ source:,
+ title: source.name,
+ hit_url: source.url,
+ snippet: build_snippet(body.gsub(/\s+/, " "), regex)
+ )
+ create ? [create] : []
+ end
+
+ def compile_regex(source)
+ return DEFAULT_REGEX if source.match_regex.blank?
+ Regexp.new(source.match_regex, Regexp::IGNORECASE | Regexp::MULTILINE)
+ rescue RegexpError
+ DEFAULT_REGEX
+ end
+
+ def build_snippet(text, regex)
+ m = text.match(regex)
+ return text[0, 280] if m.nil?
+
+ start = [m.begin(0) - 120, 0].max
+ slice = text[start, 380] || text
+ slice.strip
+ end
+
+ def absolutize_url(base, href)
+ uri = URI.parse(href)
+ return uri.to_s if uri.absolute?
+ URI.join(base, href).to_s
+ rescue
+ href
+ end
+
+ def create_hit!(source:, title:, hit_url:, snippet:, published_at: nil)
+ fp = Digest::SHA256.hexdigest([source.id, title.to_s, hit_url.to_s, snippet.to_s[0, 200]].join("|"))
+
+ TrainingWatchHit.create!(
+ training_watch_source: source,
+ title: title.to_s.strip.presence,
+ hit_url: hit_url.to_s.strip.presence,
+ snippet: snippet.to_s.strip.presence,
+ published_at:,
+ fingerprint: fp
+ )
+ rescue ActiveRecord::RecordNotUnique
+ nil
+ end
+ end
+end
diff --git a/app/views/layouts/application.html.erb b/app/views/layouts/application.html.erb
index 700d510..6386cb5 100644
--- a/app/views/layouts/application.html.erb
+++ b/app/views/layouts/application.html.erb
@@ -17,7 +17,7 @@
-
+