Skip to content

Commit d80b62e

Browse files
committed
Bypass RegExp error detecting title Â~ link
Failed on 2023-07 post 723431704291901440 with title `"GitHub - gnl/playback: #>(tag'n'trace any Clojure(-Script) form to tap> and Portal with automatic last-input function replay on eval, instan",` with the RegExp parsing error > Unclosed group near index 157\n(GitHub - )?(: )?#>(tag'n'trace any Clojure(-Script) form to tap> and Portal with automatic last-input function replay on eval, instant re-rend
1 parent 421a3bb commit d80b62e

File tree

2 files changed

+70
-2
lines changed

2 files changed

+70
-2
lines changed

src/clj_tumblr_summarizer/ml.clj

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
(ns clj-tumblr-summarizer.ml
2+
(:require
3+
[clj-tumblr-summarizer.storage.fs :as fs]
4+
[clj-tumblr-summarizer.time :as time]
5+
[scicloj.ml.core :as ml]
6+
[scicloj.ml.metamorph :as mm]
7+
[scicloj.ml.dataset :as ds]))
8+
9+
@(def tag-lists-ds
10+
(-> (->> (fs/posts-in-time-range (time/previous-month))
11+
(map #(select-keys % [:tags])))
12+
(ds/dataset)
13+
(ds/separate-column :tags :infer #(zipmap % (repeat 1)))
14+
(ds/replace-missing :all :value 0)))
15+
16+
(def min-nr-neighbors 1) ; 1 => 4x, 2 => 2x cluster
17+
(def radius 1) ; > 1 => single cluster
18+
19+
;; (def ds (-> [{:tags ["app" "mobile" "travel"]}
20+
;; {:tags ["business"]}
21+
;; {:tags ["clojure" "library" "data processing"]}
22+
;; {:tags ["clojure" "tool" "docker" "devops"]}
23+
;; {:tags ["tool" "automation" "macos"]}]
24+
;; (ds/dataset)
25+
;; (ds/separate-column :tags :infer #(zipmap % (repeat 1)))
26+
;; (ds/replace-missing :all :value 0)))
27+
28+
(def pipe-fn
29+
(ml/pipeline
30+
{:metamorph/id :tag-cluster}
31+
(scicloj.ml.metamorph/cluster :dbscan [min-nr-neighbors radius] :cluster-id)))
32+
33+
(->
34+
@(def trained-ctx
35+
(pipe-fn {:metamorph/data tag-lists-ds
36+
:metamorph/mode :fit}))
37+
38+
(:metamorph/data trained-ctx)
39+
(ds/select-columns :cluster-id)
40+
(ds/unique-by :cluster-id))
41+
42+
;; (-> (ds/dataset tag-lists) (ds/fit-one-hot :tags))
43+
;; (-> (ds/dataset tag-lists)
44+
;; ;(ds/add-column :id (range))
45+
;; (ds/separate-column :tags :infer #(zipmap % (repeat 1))))
46+
47+
48+
;; EXAMPLE CODE
49+
;; (def titanic-test
50+
;; (-> "https://github.com/scicloj/metamorph-examples/raw/main/data/titanic/test.csv"
51+
;; (ds/dataset {:key-fn keyword :parser-fn :string})
52+
;; (ds/add-column :Survived [""] :cycle)))
53+
54+
;; ;; construct pipeline function including Logistic Regression model
55+
;; (def pipe-fn
56+
;; (ml/pipeline
57+
;; (mm/select-columns [:Survived :Pclass])
58+
;; (mm/add-column :Survived (fn [ds] (map #(case % "1" "yes" "0" "no" nil "") (:Survived ds))))
59+
;; (mm/categorical->number [:Survived :Pclass])
60+
;; (mm/set-inference-target :Survived)
61+
;; {:metamorph/id :model}
62+
;; (mm/model {:model-type :smile.classification/logistic-regression})))
63+
64+
;; ;; execute pipeline with train data including model in mode :fit
65+
;; (def trained-ctx
66+
;; (pipe-fn {:metamorph/data titanic-train
67+
;; :metamorph/mode :fit}))

src/clj_tumblr_summarizer/output.clj

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -119,8 +119,9 @@
119119
qt str/re-quote-replacement
120120
description' (cond-> description
121121
match (str/replace-first match ""))
122-
duplicates-link? (some->> description' (re-matches (re-pattern (str "(GitHub - )?(" (some-> project qt) ": )?"
123-
(some-> description' qt)))))]
122+
duplicates-link? (try (some->> description' (re-matches (re-pattern (str "(GitHub - )?(" (some-> project qt) ": )?"
123+
(some-> description' qt)))))
124+
(catch java.util.regex.PatternSyntaxException _))]
124125
[:span.link "👓 " [:a {:href (unredirect url)} (rm-gh summary)]
125126
(when (seq tags)
126127
(list " [" (str/join ", " tags) "]"))

0 commit comments

Comments
 (0)