From 2cecfd42a5d14836b779782fc714989d354b4733 Mon Sep 17 00:00:00 2001 From: Lynne Date: Mon, 25 Feb 2019 11:17:06 +1000 Subject: [PATCH] added support for learning from CW'd posts --- config.json | 3 ++- functions.py | 8 ++++++-- main.py | 17 +++++++++++------ 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/config.json b/config.json index cc02a33..4d459f2 100644 --- a/config.json +++ b/config.json @@ -1,4 +1,5 @@ { "site": "https://botsin.space", - "cw": null + "cw": null, + "learn_from_cw": false } \ No newline at end of file diff --git a/functions.py b/functions.py index c21271a..7e0d917 100755 --- a/functions.py +++ b/functions.py @@ -7,6 +7,8 @@ import markovify from bs4 import BeautifulSoup import re, multiprocessing, sqlite3, shutil, os, json +cfg = json.load(open('config.json')) + def make_sentence(output): class nlt_fixed(markovify.NewlineText): #modified version of NewlineText that never rejects sentences def test_sentence_input(self, sentence): @@ -16,7 +18,10 @@ def make_sentence(output): db = sqlite3.connect("toots-copy.db") db.text_factory=str c = db.cursor() - toots = c.execute("SELECT content FROM `toots` ORDER BY RANDOM() LIMIT 10000").fetchall() + if cfg['learn_from_cw']: + toots = c.execute("SELECT content FROM `toots` ORDER BY RANDOM() LIMIT 10000").fetchall() + else: + toots = c.execute("SELECT content FROM `toots` WHERE cw = 0 ORDER BY RANDOM() LIMIT 10000").fetchall() toots_str = "" for toot in toots: toots_str += "\n{}".format(toot[0]) @@ -32,7 +37,6 @@ def make_sentence(output): tries = tries + 1 sentence = re.sub("^(?:@\u202B[^ ]* )*", "", sentence) #remove leading pings (don't say "@bob blah blah" but still say "blah @bob blah") - sentence = re.sub("^(?:@\u200B[^ ]* )*", "", sentence) output.send(sentence) diff --git a/main.py b/main.py index e1d9247..80d18b1 100755 --- a/main.py +++ b/main.py @@ -18,7 +18,8 @@ except: shutil.copy2("config.sample.json", "config.json") cfg = json.load(open('config.json', 'r')) -#config.json *MUST* contain the instance URL, the instance blacklist (for dead/broken instances), and the CW text. if they're not provided, we'll fall back to defaults. +#config.json should contain the instance URL, the instance blacklist (for dead/broken instances), and the CW text. if they're not provided, we'll fall back to defaults. +# TODO: this is pretty messy if 'site' not in cfg: cfg['website'] = "https://botsin.space" if 'cw' not in cfg: @@ -28,6 +29,8 @@ if 'instance_blacklist' not in cfg: "bofa.lol", "witches.town" ] +if 'learn_from_cw' not in cfg: + cfg['learn_from_cw'] = False #if the user is using a (very!) old version that still uses the .secret files, migrate to the new method if os.path.exists("clientcred.secret"): @@ -82,7 +85,11 @@ following = client.account_following(me.id) db = sqlite3.connect("toots.db") db.text_factory=str c = db.cursor() -c.execute("CREATE TABLE IF NOT EXISTS `toots` (id INT NOT NULL UNIQUE PRIMARY KEY, userid INT NOT NULL, uri VARCHAR NOT NULL, content VARCHAR NOT NULL) WITHOUT ROWID") +c.execute("CREATE TABLE IF NOT EXISTS `toots` (id INT NOT NULL UNIQUE PRIMARY KEY, cw INT NOT NULL DEFAULT 0, userid INT NOT NULL, uri VARCHAR NOT NULL, content VARCHAR NOT NULL) WITHOUT ROWID") +try: + c.execute("ALTER TABLE `toots` ADD COLUMN cw INT NOT NULL DEFAULT 0") +except: + pass # column already exists db.commit() def handleCtrlC(signal, frame): @@ -157,9 +164,6 @@ for f in following: # its a toost baby content = oi['object']['content'] - if oi['object']['summary'] != None and oi['object']['summary'] != "": - #don't download CW'd toots. if you want your bot to download and learn from CW'd toots, replace "continue" with "pass". (todo: add a config.json option for this) - continue toot = extract_toot(content) # print(toot) try: @@ -170,8 +174,9 @@ for f in following: done = True break pid = patterns["pid"].search(oi['object']['id']).group(0) - c.execute("REPLACE INTO toots (id, userid, uri, content) VALUES (?, ?, ?, ?)", ( + c.execute("REPLACE INTO toots (id, cw, userid, uri, content) VALUES (?, ?, ?, ?)", ( pid, + 1 if (oi['object']['summary'] != None and oi['object']['summary'] != "") else 0, f.id, oi['object']['id'], toot