From 400410476648bbd8ca8b0ff9237b09e12c7a083e Mon Sep 17 00:00:00 2001 From: Lynne Date: Fri, 11 Jan 2019 22:05:29 +1000 Subject: [PATCH 01/22] better commenting, handle missing config.json fields --- main.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/main.py b/main.py index 560df61..085b065 100755 --- a/main.py +++ b/main.py @@ -13,6 +13,13 @@ import requests scopes = ["read:statuses", "read:accounts", "read:follows", "write:statuses", "read:notifications"] cfg = json.load(open('config.json', 'r')) +#config.json *MUST* contain the instance URL, and the CW text. if they're not provided, we'll fall back to defaults. +if 'site' not in cfg: + cfg['website'] = "https://botsin.space" +if 'cw' not in cfg: + cfg['cw'] = "" + +#if the user is using a (very!) old version that still uses the .secret files, migrate to the new method if os.path.exists("clientcred.secret"): print("Upgrading to new storage method") cc = open("clientcred.secret").read().split("\n") From 480c425b82dfaa161b89ad03fdfd03afa86a846b Mon Sep 17 00:00:00 2001 From: Lynne Date: Fri, 11 Jan 2019 22:08:10 +1000 Subject: [PATCH 02/22] easier to understand logging --- main.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/main.py b/main.py index 085b065..65816fe 100755 --- a/main.py +++ b/main.py @@ -33,7 +33,7 @@ if os.path.exists("clientcred.secret"): if "client" not in cfg: - print("No client credentials, registering application") + print("No application info -- registering application with {}".format(cfg['site'])) client_id, client_secret = Mastodon.create_app("mstdn-ebooks", api_base_url=cfg['site'], scopes=scopes, @@ -45,12 +45,12 @@ if "client" not in cfg: } if "secret" not in cfg: - print("No user credentials, logging in") + print("No user credentials -- logging in to {}".format(cfg['site'])) client = Mastodon(client_id = cfg['client']['id'], client_secret = cfg['client']['secret'], api_base_url=cfg['site']) - print("Open this URL: {}".format(client.auth_request_url(scopes=scopes))) + print("Open this URL and authenticate to give mstdn-ebooks access to your bot's account: {}".format(client.auth_request_url(scopes=scopes))) cfg['secret'] = client.log_in(code=input("Secret: "), scopes=scopes) json.dump(cfg, open("config.json", "w+")) @@ -180,7 +180,7 @@ for f in following: r = requests.get(uri) j = r.json() - print("Downloading and parsing toots", end='', flush=True) + print("Downloading and saving toots", end='', flush=True) done = False try: while not done and len(j['orderedItems']) > 0: @@ -222,7 +222,7 @@ for f in following: print(" Done!") db.commit() except: - print("Encountered an error! Saving toots to database and continuing.") + print("Encountered an error! Saving toots to database and moving to next followed account.") db.commit() # db.close() From 829cf8288061d1cde508aa08cd344abedeb90dd4 Mon Sep 17 00:00:00 2001 From: Lynne Date: Fri, 11 Jan 2019 22:12:31 +1000 Subject: [PATCH 03/22] added .vscode to .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index d23632f..ddee0d0 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ toots.db toots.db-journal toots.db-wal __pycache__/* +.vscode/ From adbf6527e25bdf79ac6956df8c3110245f1b9ccd Mon Sep 17 00:00:00 2001 From: Lynne Date: Fri, 11 Jan 2019 22:13:23 +1000 Subject: [PATCH 04/22] removed unused legacy code --- main.py | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/main.py b/main.py index 65816fe..27f0d1a 100755 --- a/main.py +++ b/main.py @@ -111,26 +111,6 @@ def handleCtrlC(signal, frame): signal.signal(signal.SIGINT, handleCtrlC) -def get_toots_legacy(client, id): - i = 0 - toots = client.account_statuses(id) - while toots is not None and len(toots) > 0: - for toot in toots: - if toot.spoiler_text != "": continue - if toot.reblog is not None: continue - if toot.visibility not in ["public", "unlisted"]: continue - t = extract_toot(toot.content) - if t != None: - yield { - "toot": t, - "id": toot.id, - "uri": toot.uri - } - toots = client.fetch_next(toots) - i += 1 - if i%20 == 0: - print('.', end='', flush=True) - for f in following: last_toot = c.execute("SELECT id FROM `toots` WHERE userid LIKE ? ORDER BY id DESC LIMIT 1", (f.id,)).fetchone() if last_toot != None: From e58097615db9d0fa6b9c449573bb9be00ad7c8dd Mon Sep 17 00:00:00 2001 From: Lynne Date: Fri, 11 Jan 2019 22:15:05 +1000 Subject: [PATCH 05/22] more commenting --- main.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/main.py b/main.py index 27f0d1a..e4c0662 100755 --- a/main.py +++ b/main.py @@ -130,8 +130,7 @@ for f in following: if instance == "bofa.lol": print("rest in piece bofa, skipping") continue - - # print("{} is on {}".format(f.acct, instance)) + try: r = requests.get("https://{}/.well-known/host-meta".format(instance), timeout=10) uri = re.search(r'template="([^"]+)"', r.text).group(1) @@ -166,12 +165,12 @@ for f in following: while not done and len(j['orderedItems']) > 0: for oi in j['orderedItems']: if oi['type'] != "Create": - continue #not a toost. fuck outta here + continue #this isn't a toot/post/status/whatever, it's a boost or a follow or some other activitypub thing. ignore # its a toost baby content = oi['object']['content'] if oi['object']['summary'] != None: - #don't download CW'd toots + #don't download CW'd toots. if you want your bot to download and learn from CW'd toots, replace "continue" with "pass". (todo: add a config.json option for this) continue toot = extract_toot(content) # print(toot) From 0a66c1db5123899e57fb60fa93dc1f8f372be7a9 Mon Sep 17 00:00:00 2001 From: Lynne Date: Fri, 11 Jan 2019 22:16:04 +1000 Subject: [PATCH 06/22] added my @ to the error message --- create.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/create.py b/create.py index caa62a3..61218f5 100755 --- a/create.py +++ b/create.py @@ -12,9 +12,6 @@ def make_sentence(output): def test_sentence_input(self, sentence): return True #all sentences are valid <3 - # with open("corpus.txt", encoding="utf-8") as fp: - # model = nlt_fixed(fp.read()) - shutil.copyfile("toots.db", "toots-copy.db") db = sqlite3.connect("toots-copy.db") db.text_factory=str @@ -55,7 +52,7 @@ def make_toot_markov(query = None): else: toot = pin.recv() if toot == None: - toot = "Toot generation failed! Contact Lynne for assistance." + toot = "Toot generation failed! Contact Lynne (lynnesbian@fedi.lynnesbian.space) for assistance." return { "toot":toot, "media":None From 85fec32c835c3d80bda4f86cd4971cf2476c390d Mon Sep 17 00:00:00 2001 From: Lynne Date: Fri, 11 Jan 2019 22:47:42 +1000 Subject: [PATCH 07/22] renamed create.py to functions.py --- .gitignore | 1 + create.py => functions.py | 27 +++++++++++++++------------ gen.py | 13 ++++--------- reply.py | 4 ++-- 4 files changed, 22 insertions(+), 23 deletions(-) rename create.py => functions.py (59%) diff --git a/.gitignore b/.gitignore index ddee0d0..d0aabc5 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ toots.db-journal toots.db-wal __pycache__/* .vscode/ +.editorconfig diff --git a/create.py b/functions.py similarity index 59% rename from create.py rename to functions.py index 61218f5..80ec6fe 100755 --- a/create.py +++ b/functions.py @@ -4,15 +4,15 @@ # file, You can obtain one at http://mozilla.org/MPL/2.0/. import markovify -import json -import re, random, multiprocessing, time, sqlite3, shutil, os +from bs4 import BeautifulSoup +import re, random, multiprocessing, sqlite3, shutil, os, json def make_sentence(output): - class nlt_fixed(markovify.NewlineText): + class nlt_fixed(markovify.NewlineText): #modified version of NewlineText that never rejects sentences def test_sentence_input(self, sentence): return True #all sentences are valid <3 - shutil.copyfile("toots.db", "toots-copy.db") + shutil.copyfile("toots.db", "toots-copy.db") #create a copy of the database because reply.py will be using the main one db = sqlite3.connect("toots-copy.db") db.text_factory=str c = db.cursor() @@ -30,7 +30,10 @@ def make_sentence(output): while sentence is None and tries < 10: sentence = model.make_short_sentence(500, tries=10000) tries = tries + 1 - sentence = re.sub("^@\u202B[^ ]* ", "", sentence) + + sentence = re.sub("^(?:@\u202B[^ ]* )*", "", sentence) #remove leading pings (don't say "@bob blah blah" but still say "blah @bob blah") + sentence = re.sub("^(?:@\u200B[^ ]* )*", "", sentence) + output.send(sentence) def make_toot(force_markov = False, args = None): @@ -39,21 +42,21 @@ def make_toot(force_markov = False, args = None): def make_toot_markov(query = None): tries = 0 toot = None - while toot == None and tries < 25: + while toot == None and tries < 10: #try to make a toot 10 times pin, pout = multiprocessing.Pipe(False) p = multiprocessing.Process(target = make_sentence, args = [pout]) p.start() - p.join(10) - if p.is_alive(): + p.join(10) #wait 10 seconds to get something + if p.is_alive(): #if it's still trying to make a toot after 10 seconds p.terminate() p.join() toot = None - tries = tries + 1 + tries = tries + 1 #give up, and increment tries by one else: toot = pin.recv() - if toot == None: + if toot == None: #if we've tried and failed ten times, just give up toot = "Toot generation failed! Contact Lynne (lynnesbian@fedi.lynnesbian.space) for assistance." return { - "toot":toot, - "media":None + "toot": toot, + "media": None } diff --git a/gen.py b/gen.py index a07dcce..3143e39 100755 --- a/gen.py +++ b/gen.py @@ -5,13 +5,11 @@ from mastodon import Mastodon import argparse, sys, traceback, json -import create +import functions parser = argparse.ArgumentParser(description='Generate and post a toot.') -parser.add_argument('reply', metavar='reply', type=str, nargs='?', - help='ID of the status to reply to') parser.add_argument('-s', '--simulate', dest='simulate', action='store_true', - help="Print the toot to stdout without posting it") + help="Print the toot without actually posting it. Use this to make sure your bot's actually working.") args = parser.parse_args() @@ -24,7 +22,7 @@ client = Mastodon( api_base_url=cfg['site']) if __name__ == '__main__': - toot = create.make_toot() + toot = functions.make_toot() if not args.simulate: try: if toot['media'] != None: @@ -35,10 +33,7 @@ if __name__ == '__main__': client.status_post(toot['toot'], visibility = 'unlisted', spoiler_text = cfg['cw']) except Exception as err: toot = { - "toot": - "Mistress @lynnesbian@fedi.lynnesbian.space, something has gone terribly" \ - + " wrong! While attempting to post a toot, I received the following" \ - + " error:\n" + "\n".join(traceback.format_tb(sys.exc_info()[2])) + "toot": "An unknown error that should never happen occurred. Maybe it's because of the spoiler text, which is {}. If not, I have no idea what went wrong. This is an error message -- contact lynnesbian@fedi.lynnesbian.space for assistance.".format(cfg['cw']) } client.status_post(toot['toot'], visibility = 'unlisted', spoiler_text = "Error!") print(toot['toot']) diff --git a/reply.py b/reply.py index 36a5c0f..233ef0c 100755 --- a/reply.py +++ b/reply.py @@ -5,7 +5,7 @@ import mastodon import os, random, re, json -import create +import functions from bs4 import BeautifulSoup cfg = json.load(open('config.json', 'r')) @@ -43,7 +43,7 @@ class ReplyListener(mastodon.StreamListener): acct = "@" + notification['account']['acct'] post_id = notification['status']['id'] mention = extract_toot(notification['status']['content']) - toot = create.make_toot(True)['toot'] + toot = functions.make_toot(True)['toot'] toot = acct + " " + toot print(acct + " says " + mention) visibility = notification['status']['visibility'] From de3449ae562b5a3f60190ec63842b0d2fc1dd890 Mon Sep 17 00:00:00 2001 From: Lynne Date: Fri, 11 Jan 2019 22:55:31 +1000 Subject: [PATCH 08/22] added extract_toot function to functions.py --- functions.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/functions.py b/functions.py index 80ec6fe..31caf36 100755 --- a/functions.py +++ b/functions.py @@ -60,3 +60,26 @@ def make_toot_markov(query = None): "toot": toot, "media": None } + +def extract_toot(toot): + soup = BeautifulSoup(toot, "html.parser") + for lb in soup.select("br"): #replace
with linebreak + lb.insert_after("\n") + lb.decompose() + + for p in soup.select("p"): #ditto for

+ p.insert_after("\n") + p.unwrap() + + for ht in soup.select("a.hashtag"): #make hashtags no longer links, just text + ht.unwrap() + + for link in soup.select("a"): #ocnvert with linebreak lb.insert_after("\n") @@ -78,7 +80,7 @@ def extract_toot(toot): link.insert_after(link["href"]) link.decompose() - toot = soup.get_text() + text = soup.get_text() text = re.sub("https://([^/]+)/(@[^ ]+)", r"\2@\1", text) #put mastodon-style mentions back in text = re.sub("https://([^/]+)/users/([^ ]+)", r"@\2@\1", text) #put pleroma-style mentions back in text = text.rstrip("\n") #remove trailing newline diff --git a/reply.py b/reply.py index 233ef0c..2e60de1 100755 --- a/reply.py +++ b/reply.py @@ -17,40 +17,25 @@ client = mastodon.Mastodon( api_base_url=cfg['site']) def extract_toot(toot): - #copied from main.py, see there for comments - soup = BeautifulSoup(toot, "html.parser") - for lb in soup.select("br"): - lb.insert_after("\n") - lb.decompose() - for p in soup.select("p"): - p.insert_after("\n") - p.unwrap() - for ht in soup.select("a.hashtag"): - ht.unwrap() - for link in soup.select("a"): - link.insert_after(link["href"]) - link.decompose() - text = map(lambda a: a.strip(), soup.get_text().strip().split("\n")) - text = "\n".join(list(text)) - text = re.sub("https?://([^/]+)/(@[^ ]+)", r"\2@\1", text) #put mentions back in - text = re.sub("^@[^@]+@[^ ]+ *", r"", text) #...but remove the initial one - text = text.lower() #for easier matching + text = functions.extract_toot(toot) + text = re.sub(r"^@[^@]+@[^ ]+\s*", r"", text) #remove the initial mention + text = text.lower() #treat text as lowercase for easier keyword matching (if this bot uses it) return text class ReplyListener(mastodon.StreamListener): - def on_notification(self, notification): - if notification['type'] == 'mention': - acct = "@" + notification['account']['acct'] + def on_notification(self, notification): #listen for notifications + if notification['type'] == 'mention': #if we're mentioned: + acct = "@" + notification['account']['acct'] #get the account's @ post_id = notification['status']['id'] mention = extract_toot(notification['status']['content']) - toot = functions.make_toot(True)['toot'] - toot = acct + " " + toot - print(acct + " says " + mention) + toot = functions.make_toot(True)['toot'] #generate a toot + toot = acct + " " + toot #prepend the @ + print(acct + " says " + mention) #logging visibility = notification['status']['visibility'] if visibility == "public": visibility = "unlisted" - client.status_post(toot, post_id, visibility=visibility, spoiler_text = cfg['cw']) - print("replied with " + toot) + client.status_post(toot, post_id, visibility=visibility, spoiler_text = cfg['cw']) #send toost + print("replied with " + toot) #logging rl = ReplyListener() -client.stream_user(rl) +client.stream_user(rl) #go! From 3e3f905839f3a5174574610b252fd2a6328519ac Mon Sep 17 00:00:00 2001 From: Lynne Date: Fri, 11 Jan 2019 22:57:18 +1000 Subject: [PATCH 10/22] removed unused import of random --- functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/functions.py b/functions.py index 8a11466..6d88243 100755 --- a/functions.py +++ b/functions.py @@ -5,7 +5,7 @@ import markovify from bs4 import BeautifulSoup -import re, random, multiprocessing, sqlite3, shutil, os, json +import re, multiprocessing, sqlite3, shutil, os, json def make_sentence(output): class nlt_fixed(markovify.NewlineText): #modified version of NewlineText that never rejects sentences From 9bbd659bf5bb083a1e2bd0005d944ef40c490b04 Mon Sep 17 00:00:00 2001 From: Lynne Date: Fri, 11 Jan 2019 22:58:17 +1000 Subject: [PATCH 11/22] main.py now uses functions.py's extract_toot func --- main.py | 32 ++------------------------------ 1 file changed, 2 insertions(+), 30 deletions(-) diff --git a/main.py b/main.py index e4c0662..c56388c 100755 --- a/main.py +++ b/main.py @@ -9,6 +9,7 @@ from os import path from bs4 import BeautifulSoup import os, sqlite3, signal, sys, json, re import requests +import functions scopes = ["read:statuses", "read:accounts", "read:follows", "write:statuses", "read:notifications"] cfg = json.load(open('config.json', 'r')) @@ -56,36 +57,7 @@ if "secret" not in cfg: json.dump(cfg, open("config.json", "w+")) def extract_toot(toot): - toot = toot.replace("'", "'") - toot = toot.replace(""", '"') - soup = BeautifulSoup(toot, "html.parser") - - # this is the code that removes all mentions - for mention in soup.select("span.h-card"): - mention.a.unwrap() - mention.span.unwrap() - - # replace
with linebreak - for lb in soup.select("br"): - lb.insert_after("\n") - lb.decompose() - - # replace

with linebreak - for p in soup.select("p"): - p.insert_after("\n") - p.unwrap() - - # fix hashtags - for ht in soup.select("a.hashtag"): - ht.unwrap() - - # fix links - for link in soup.select("a"): - link.insert_after(link["href"]) - link.decompose() - - toot = soup.get_text() - toot = toot.rstrip("\n") #remove trailing newline + toot = functions.extract_toot(toot) toot = toot.replace("@", "@\u200B") #put a zws between @ and username to avoid mentioning return(toot) From 2e0bf28c8ec96190a61c846eaa5a8847db1e1df6 Mon Sep 17 00:00:00 2001 From: Lynne Date: Fri, 11 Jan 2019 23:02:36 +1000 Subject: [PATCH 12/22] funy coment --- main.py | 1 + 1 file changed, 1 insertion(+) diff --git a/main.py b/main.py index c56388c..adbf8ab 100755 --- a/main.py +++ b/main.py @@ -150,6 +150,7 @@ for f in following: if pleroma: if c.execute("SELECT COUNT(*) FROM toots WHERE id LIKE ?", (oi['object']['id'],)).fetchone()[0] > 0: #we've caught up to the notices we've already downloaded, so we can stop now + #you might be wondering, "lynne, what if the instance ratelimits you after 40 posts, and they've made 60 since main.py was last run? wouldn't the bot miss 20 posts and never be able to see them?" to which i reply, "it's called mstdn-ebooks not fediverse-ebooks. pleroma support is an afterthought" done = True break pid = re.search(r"[^\/]+$", oi['object']['id']).group(0) From 0141b513a926ab68a25addbca3b3ab92cb36d7e9 Mon Sep 17 00:00:00 2001 From: Lynne Date: Fri, 11 Jan 2019 23:05:45 +1000 Subject: [PATCH 13/22] minor fix --- main.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/main.py b/main.py index adbf8ab..3a82ece 100755 --- a/main.py +++ b/main.py @@ -18,7 +18,7 @@ cfg = json.load(open('config.json', 'r')) if 'site' not in cfg: cfg['website'] = "https://botsin.space" if 'cw' not in cfg: - cfg['cw'] = "" + cfg['cw'] = None #if the user is using a (very!) old version that still uses the .secret files, migrate to the new method if os.path.exists("clientcred.secret"): @@ -154,8 +154,8 @@ for f in following: done = True break pid = re.search(r"[^\/]+$", oi['object']['id']).group(0) - c.execute("REPLACE INTO toots (id, userid, uri, content) VALUES (?, ?, ?, ?)", - (pid, + c.execute("REPLACE INTO toots (id, userid, uri, content) VALUES (?, ?, ?, ?)", ( + pid, f.id, oi['object']['id'], toot @@ -164,7 +164,6 @@ for f in following: pass except: pass #ignore any toots that don't successfully go into the DB - # sys.exit(0) if not pleroma: r = requests.get(j['prev'], timeout=15) else: @@ -176,7 +175,6 @@ for f in following: except: print("Encountered an error! Saving toots to database and moving to next followed account.") db.commit() - # db.close() print("Done!") From 4530a9729f94f373471e5af24fe8d3924824e033 Mon Sep 17 00:00:00 2001 From: Lynne Date: Fri, 11 Jan 2019 23:07:10 +1000 Subject: [PATCH 14/22] made config.json prettier --- config.json | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/config.json b/config.json index 7b1521d..cc02a33 100644 --- a/config.json +++ b/config.json @@ -1 +1,4 @@ -{"site":"https://botsin.space","cw":null} \ No newline at end of file +{ + "site": "https://botsin.space", + "cw": null +} \ No newline at end of file From 66899c8226a3d51eca753ea87ebace0f74d7b857 Mon Sep 17 00:00:00 2001 From: Lynne Date: Fri, 11 Jan 2019 23:08:53 +1000 Subject: [PATCH 15/22] added instance blacklist --- main.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/main.py b/main.py index 3a82ece..6d3e921 100755 --- a/main.py +++ b/main.py @@ -14,11 +14,16 @@ import functions scopes = ["read:statuses", "read:accounts", "read:follows", "write:statuses", "read:notifications"] cfg = json.load(open('config.json', 'r')) -#config.json *MUST* contain the instance URL, and the CW text. if they're not provided, we'll fall back to defaults. +#config.json *MUST* contain the instance URL, the instance blacklist (for dead/broken instances), and the CW text. if they're not provided, we'll fall back to defaults. if 'site' not in cfg: cfg['website'] = "https://botsin.space" if 'cw' not in cfg: cfg['cw'] = None +if 'instance_blacklist' not in cfg: + cfg["instance_blacklist"] = [ + "bofa.lol", + "witches.town" + ] #if the user is using a (very!) old version that still uses the .secret files, migrate to the new method if os.path.exists("clientcred.secret"): @@ -99,8 +104,8 @@ for f in following: else: instance = instance.group(1) - if instance == "bofa.lol": - print("rest in piece bofa, skipping") + if instance in cfg['instance_blacklist']: + print("skipping blacklisted instance: {}".format(instance)) continue try: From 213c40351df5b6693ba2c80ad063f30bf2e20428 Mon Sep 17 00:00:00 2001 From: Lynne Date: Mon, 14 Jan 2019 08:38:54 +1000 Subject: [PATCH 16/22] only use 10k toots rather than all of them to avoid having Too Many Toots --- functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/functions.py b/functions.py index 6d88243..c21271a 100755 --- a/functions.py +++ b/functions.py @@ -16,7 +16,7 @@ def make_sentence(output): db = sqlite3.connect("toots-copy.db") db.text_factory=str c = db.cursor() - toots = c.execute("SELECT content FROM `toots`").fetchall() + toots = c.execute("SELECT content FROM `toots` ORDER BY RANDOM() LIMIT 10000").fetchall() toots_str = "" for toot in toots: toots_str += "\n{}".format(toot[0]) From 5d62b17310be70cadde1152decf785d7ae82609c Mon Sep 17 00:00:00 2001 From: Lynne Date: Thu, 7 Feb 2019 10:53:23 +1000 Subject: [PATCH 17/22] removed hacky solution, added misskey support --- main.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/main.py b/main.py index 6d3e921..45297e5 100755 --- a/main.py +++ b/main.py @@ -114,10 +114,10 @@ for f in following: uri = uri.format(uri = "{}@{}".format(f.username, instance)) r = requests.get(uri, headers={"Accept": "application/json"}, timeout=10) j = r.json() - if len(j['aliases']) == 1: #TODO: this is a hack on top of a hack, fix it - uri = j['aliases'][0] - else: - uri = j['aliases'][1] + for link in j['links']: + if link['rel'] == 'self': + #this is a link formatted like "https://instan.ce/users/username", which is what we need + uri = link['href'] uri = "{}/outbox?page=true".format(uri) r = requests.get(uri, timeout=10) j = r.json() @@ -131,7 +131,7 @@ for f in following: pleroma = True j = j['first'] else: - print("Mastodon instance detected") + print("Mastodon/Misskey instance detected") uri = "{}&min_id={}".format(uri, last_toot) r = requests.get(uri) j = r.json() From 0d162b067bc3a5a347e401c3c786e9a7c6af185b Mon Sep 17 00:00:00 2001 From: Lynne Date: Thu, 7 Feb 2019 10:58:27 +1000 Subject: [PATCH 18/22] webfinger is NOT a joke. --- main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.py b/main.py index 45297e5..2c52f98 100755 --- a/main.py +++ b/main.py @@ -97,7 +97,7 @@ for f in following: print("Harvesting toots for user @{}, starting from {}".format(f.acct, last_toot)) #find the user's activitypub outbox - print("WebFingering...") + print("WebFingering... (do not laugh at this. WebFinger is a federated protocol. https://wikipedia.org/wiki/WebFinger)") instance = re.search(r"^.*@(.+)", f.acct) if instance == None: instance = re.search(r"https?:\/\/(.*)", cfg['site']).group(1) From 910af03aefb505c354767ce8f8638f23019e1930 Mon Sep 17 00:00:00 2001 From: Lucina Wilton Date: Thu, 7 Feb 2019 10:27:24 -0500 Subject: [PATCH 19/22] added vim swapfiles to gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index d0aabc5..505249e 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ toots.db-wal __pycache__/* .vscode/ .editorconfig +.*.swp From f71c4af0e61dbea26d9c7cb694c83a7c029dcdba Mon Sep 17 00:00:00 2001 From: Lucina Wilton Date: Thu, 7 Feb 2019 10:27:52 -0500 Subject: [PATCH 20/22] precompiled regexes used inside of loop --- main.py | 263 +++++++++++++++++++++++++++++--------------------------- 1 file changed, 135 insertions(+), 128 deletions(-) diff --git a/main.py b/main.py index 2c52f98..bff3048 100755 --- a/main.py +++ b/main.py @@ -16,61 +16,61 @@ cfg = json.load(open('config.json', 'r')) #config.json *MUST* contain the instance URL, the instance blacklist (for dead/broken instances), and the CW text. if they're not provided, we'll fall back to defaults. if 'site' not in cfg: - cfg['website'] = "https://botsin.space" + cfg['website'] = "https://botsin.space" if 'cw' not in cfg: - cfg['cw'] = None + cfg['cw'] = None if 'instance_blacklist' not in cfg: - cfg["instance_blacklist"] = [ - "bofa.lol", - "witches.town" - ] + cfg["instance_blacklist"] = [ + "bofa.lol", + "witches.town" + ] #if the user is using a (very!) old version that still uses the .secret files, migrate to the new method if os.path.exists("clientcred.secret"): - print("Upgrading to new storage method") - cc = open("clientcred.secret").read().split("\n") - cfg['client'] = { - "id": cc[0], - "secret": cc[1] - } - cfg['secret'] = open("usercred.secret").read().rstrip("\n") - os.remove("clientcred.secret") - os.remove("usercred.secret") - + print("Upgrading to new storage method") + cc = open("clientcred.secret").read().split("\n") + cfg['client'] = { + "id": cc[0], + "secret": cc[1] + } + cfg['secret'] = open("usercred.secret").read().rstrip("\n") + os.remove("clientcred.secret") + os.remove("usercred.secret") + if "client" not in cfg: - print("No application info -- registering application with {}".format(cfg['site'])) - client_id, client_secret = Mastodon.create_app("mstdn-ebooks", - api_base_url=cfg['site'], - scopes=scopes, - website="https://github.com/Lynnesbian/mstdn-ebooks") + print("No application info -- registering application with {}".format(cfg['site'])) + client_id, client_secret = Mastodon.create_app("mstdn-ebooks", + api_base_url=cfg['site'], + scopes=scopes, + website="https://github.com/Lynnesbian/mstdn-ebooks") - cfg['client'] = { - "id": client_id, - "secret": client_secret - } + cfg['client'] = { + "id": client_id, + "secret": client_secret + } if "secret" not in cfg: - print("No user credentials -- logging in to {}".format(cfg['site'])) - client = Mastodon(client_id = cfg['client']['id'], - client_secret = cfg['client']['secret'], - api_base_url=cfg['site']) + print("No user credentials -- logging in to {}".format(cfg['site'])) + client = Mastodon(client_id = cfg['client']['id'], + client_secret = cfg['client']['secret'], + api_base_url=cfg['site']) - print("Open this URL and authenticate to give mstdn-ebooks access to your bot's account: {}".format(client.auth_request_url(scopes=scopes))) - cfg['secret'] = client.log_in(code=input("Secret: "), scopes=scopes) + print("Open this URL and authenticate to give mstdn-ebooks access to your bot's account: {}".format(client.auth_request_url(scopes=scopes))) + cfg['secret'] = client.log_in(code=input("Secret: "), scopes=scopes) json.dump(cfg, open("config.json", "w+")) def extract_toot(toot): - toot = functions.extract_toot(toot) - toot = toot.replace("@", "@\u200B") #put a zws between @ and username to avoid mentioning - return(toot) + toot = functions.extract_toot(toot) + toot = toot.replace("@", "@\u200B") #put a zws between @ and username to avoid mentioning + return(toot) client = Mastodon( - client_id=cfg['client']['id'], - client_secret = cfg['client']['secret'], - access_token=cfg['secret'], - api_base_url=cfg['site']) + client_id=cfg['client']['id'], + client_secret = cfg['client']['secret'], + access_token=cfg['secret'], + api_base_url=cfg['site']) me = client.account_verify_credentials() following = client.account_following(me.id) @@ -82,108 +82,115 @@ c.execute("CREATE TABLE IF NOT EXISTS `toots` (id INT NOT NULL UNIQUE PRIMARY KE db.commit() def handleCtrlC(signal, frame): - print("\nPREMATURE EVACUATION - Saving chunks") - db.commit() - sys.exit(1) + print("\nPREMATURE EVACUATION - Saving chunks") + db.commit() + sys.exit(1) signal.signal(signal.SIGINT, handleCtrlC) +patterns = { + "handle": re.compile(r"^.*@(.+)"), + "url": re.compile(r"https?:\/\/(.*)"), + "uri": re.compile(r'template="([^"]+)"'), + "pid": re.compile(r"[^\/]+$"), +} + for f in following: - last_toot = c.execute("SELECT id FROM `toots` WHERE userid LIKE ? ORDER BY id DESC LIMIT 1", (f.id,)).fetchone() - if last_toot != None: - last_toot = last_toot[0] - else: - last_toot = 0 - print("Harvesting toots for user @{}, starting from {}".format(f.acct, last_toot)) + last_toot = c.execute("SELECT id FROM `toots` WHERE userid LIKE ? ORDER BY id DESC LIMIT 1", (f.id,)).fetchone() + if last_toot != None: + last_toot = last_toot[0] + else: + last_toot = 0 + print("Harvesting toots for user @{}, starting from {}".format(f.acct, last_toot)) - #find the user's activitypub outbox - print("WebFingering... (do not laugh at this. WebFinger is a federated protocol. https://wikipedia.org/wiki/WebFinger)") - instance = re.search(r"^.*@(.+)", f.acct) - if instance == None: - instance = re.search(r"https?:\/\/(.*)", cfg['site']).group(1) - else: - instance = instance.group(1) + #find the user's activitypub outbox + print("WebFingering... (do not laugh at this. WebFinger is a federated protocol. https://wikipedia.org/wiki/WebFinger)") + instance = patterns["handle"].search(f.acct) + if instance == None: + instance = patterns["url"].search(cfg['site']).group(1) + else: + instance = instance.group(1) - if instance in cfg['instance_blacklist']: - print("skipping blacklisted instance: {}".format(instance)) - continue + if instance in cfg['instance_blacklist']: + print("skipping blacklisted instance: {}".format(instance)) + continue - try: - r = requests.get("https://{}/.well-known/host-meta".format(instance), timeout=10) - uri = re.search(r'template="([^"]+)"', r.text).group(1) - uri = uri.format(uri = "{}@{}".format(f.username, instance)) - r = requests.get(uri, headers={"Accept": "application/json"}, timeout=10) - j = r.json() - for link in j['links']: - if link['rel'] == 'self': - #this is a link formatted like "https://instan.ce/users/username", which is what we need - uri = link['href'] - uri = "{}/outbox?page=true".format(uri) - r = requests.get(uri, timeout=10) - j = r.json() - except Exception: - print("oopsy woopsy!! we made a fucky wucky!!!\n(we're probably rate limited, please hang up and try again)") - sys.exit(1) + try: + r = requests.get("https://{}/.well-known/host-meta".format(instance), timeout=10) + uri = patterns["uri"].search(r.text).group(1) + uri = uri.format(uri = "{}@{}".format(f.username, instance)) + r = requests.get(uri, headers={"Accept": "application/json"}, timeout=10) + j = r.json() + for link in j['links']: + if link['rel'] == 'self': + #this is a link formatted like "https://instan.ce/users/username", which is what we need + uri = link['href'] + uri = "{}/outbox?page=true".format(uri) + r = requests.get(uri, timeout=10) + j = r.json() + except Exception: + print("oopsy woopsy!! we made a fucky wucky!!!\n(we're probably rate limited, please hang up and try again)") + sys.exit(1) - pleroma = False - if 'first' in j and type(j['first']) != str: - print("Pleroma instance detected") - pleroma = True - j = j['first'] - else: - print("Mastodon/Misskey instance detected") - uri = "{}&min_id={}".format(uri, last_toot) - r = requests.get(uri) - j = r.json() + pleroma = False + if 'first' in j and type(j['first']) != str: + print("Pleroma instance detected") + pleroma = True + j = j['first'] + else: + print("Mastodon/Misskey instance detected") + uri = "{}&min_id={}".format(uri, last_toot) + r = requests.get(uri) + j = r.json() - print("Downloading and saving toots", end='', flush=True) - done = False - try: - while not done and len(j['orderedItems']) > 0: - for oi in j['orderedItems']: - if oi['type'] != "Create": - continue #this isn't a toot/post/status/whatever, it's a boost or a follow or some other activitypub thing. ignore - - # its a toost baby - content = oi['object']['content'] - if oi['object']['summary'] != None: - #don't download CW'd toots. if you want your bot to download and learn from CW'd toots, replace "continue" with "pass". (todo: add a config.json option for this) - continue - toot = extract_toot(content) - # print(toot) - try: - if pleroma: - if c.execute("SELECT COUNT(*) FROM toots WHERE id LIKE ?", (oi['object']['id'],)).fetchone()[0] > 0: - #we've caught up to the notices we've already downloaded, so we can stop now - #you might be wondering, "lynne, what if the instance ratelimits you after 40 posts, and they've made 60 since main.py was last run? wouldn't the bot miss 20 posts and never be able to see them?" to which i reply, "it's called mstdn-ebooks not fediverse-ebooks. pleroma support is an afterthought" - done = True - break - pid = re.search(r"[^\/]+$", oi['object']['id']).group(0) - c.execute("REPLACE INTO toots (id, userid, uri, content) VALUES (?, ?, ?, ?)", ( - pid, - f.id, - oi['object']['id'], - toot - ) - ) - pass - except: - pass #ignore any toots that don't successfully go into the DB - if not pleroma: - r = requests.get(j['prev'], timeout=15) - else: - r = requests.get(j['next'], timeout=15) - j = r.json() - print('.', end='', flush=True) - print(" Done!") - db.commit() - except: - print("Encountered an error! Saving toots to database and moving to next followed account.") - db.commit() + print("Downloading and saving toots", end='', flush=True) + done = False + try: + while not done and len(j['orderedItems']) > 0: + for oi in j['orderedItems']: + if oi['type'] != "Create": + continue #this isn't a toot/post/status/whatever, it's a boost or a follow or some other activitypub thing. ignore + + # its a toost baby + content = oi['object']['content'] + if oi['object']['summary'] != None: + #don't download CW'd toots. if you want your bot to download and learn from CW'd toots, replace "continue" with "pass". (todo: add a config.json option for this) + continue + toot = extract_toot(content) + # print(toot) + try: + if pleroma: + if c.execute("SELECT COUNT(*) FROM toots WHERE id LIKE ?", (oi['object']['id'],)).fetchone()[0] > 0: + #we've caught up to the notices we've already downloaded, so we can stop now + #you might be wondering, "lynne, what if the instance ratelimits you after 40 posts, and they've made 60 since main.py was last run? wouldn't the bot miss 20 posts and never be able to see them?" to which i reply, "it's called mstdn-ebooks not fediverse-ebooks. pleroma support is an afterthought" + done = True + break + pid = patterns["pid"].search(oi['object']['id']).group(0) + c.execute("REPLACE INTO toots (id, userid, uri, content) VALUES (?, ?, ?, ?)", ( + pid, + f.id, + oi['object']['id'], + toot + ) + ) + pass + except: + pass #ignore any toots that don't successfully go into the DB + if not pleroma: + r = requests.get(j['prev'], timeout=15) + else: + r = requests.get(j['next'], timeout=15) + j = r.json() + print('.', end='', flush=True) + print(" Done!") + db.commit() + except: + print("Encountered an error! Saving toots to database and moving to next followed account.") + db.commit() print("Done!") db.commit() db.execute("VACUUM") #compact db db.commit() -db.close() \ No newline at end of file +db.close() From 20c3df5393c345ef0cc6eff7764a9fe81e8bb4c8 Mon Sep 17 00:00:00 2001 From: Lucina Wilton Date: Thu, 7 Feb 2019 10:45:44 -0500 Subject: [PATCH 21/22] fixed tabs i think hopefully --- main.py | 262 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 131 insertions(+), 131 deletions(-) diff --git a/main.py b/main.py index bff3048..df10d19 100755 --- a/main.py +++ b/main.py @@ -16,61 +16,61 @@ cfg = json.load(open('config.json', 'r')) #config.json *MUST* contain the instance URL, the instance blacklist (for dead/broken instances), and the CW text. if they're not provided, we'll fall back to defaults. if 'site' not in cfg: - cfg['website'] = "https://botsin.space" + cfg['website'] = "https://botsin.space" if 'cw' not in cfg: - cfg['cw'] = None + cfg['cw'] = None if 'instance_blacklist' not in cfg: - cfg["instance_blacklist"] = [ - "bofa.lol", - "witches.town" - ] + cfg["instance_blacklist"] = [ + "bofa.lol", + "witches.town" + ] #if the user is using a (very!) old version that still uses the .secret files, migrate to the new method if os.path.exists("clientcred.secret"): - print("Upgrading to new storage method") - cc = open("clientcred.secret").read().split("\n") - cfg['client'] = { - "id": cc[0], - "secret": cc[1] - } - cfg['secret'] = open("usercred.secret").read().rstrip("\n") - os.remove("clientcred.secret") - os.remove("usercred.secret") - + print("Upgrading to new storage method") + cc = open("clientcred.secret").read().split("\n") + cfg['client'] = { + "id": cc[0], + "secret": cc[1] + } + cfg['secret'] = open("usercred.secret").read().rstrip("\n") + os.remove("clientcred.secret") + os.remove("usercred.secret") + if "client" not in cfg: - print("No application info -- registering application with {}".format(cfg['site'])) - client_id, client_secret = Mastodon.create_app("mstdn-ebooks", - api_base_url=cfg['site'], - scopes=scopes, - website="https://github.com/Lynnesbian/mstdn-ebooks") + print("No application info -- registering application with {}".format(cfg['site'])) + client_id, client_secret = Mastodon.create_app("mstdn-ebooks", + api_base_url=cfg['site'], + scopes=scopes, + website="https://github.com/Lynnesbian/mstdn-ebooks") - cfg['client'] = { - "id": client_id, - "secret": client_secret - } + cfg['client'] = { + "id": client_id, + "secret": client_secret + } if "secret" not in cfg: - print("No user credentials -- logging in to {}".format(cfg['site'])) - client = Mastodon(client_id = cfg['client']['id'], - client_secret = cfg['client']['secret'], - api_base_url=cfg['site']) + print("No user credentials -- logging in to {}".format(cfg['site'])) + client = Mastodon(client_id = cfg['client']['id'], + client_secret = cfg['client']['secret'], + api_base_url=cfg['site']) - print("Open this URL and authenticate to give mstdn-ebooks access to your bot's account: {}".format(client.auth_request_url(scopes=scopes))) - cfg['secret'] = client.log_in(code=input("Secret: "), scopes=scopes) + print("Open this URL and authenticate to give mstdn-ebooks access to your bot's account: {}".format(client.auth_request_url(scopes=scopes))) + cfg['secret'] = client.log_in(code=input("Secret: "), scopes=scopes) json.dump(cfg, open("config.json", "w+")) def extract_toot(toot): - toot = functions.extract_toot(toot) - toot = toot.replace("@", "@\u200B") #put a zws between @ and username to avoid mentioning - return(toot) + toot = functions.extract_toot(toot) + toot = toot.replace("@", "@\u200B") #put a zws between @ and username to avoid mentioning + return(toot) client = Mastodon( - client_id=cfg['client']['id'], - client_secret = cfg['client']['secret'], - access_token=cfg['secret'], - api_base_url=cfg['site']) + client_id=cfg['client']['id'], + client_secret = cfg['client']['secret'], + access_token=cfg['secret'], + api_base_url=cfg['site']) me = client.account_verify_credentials() following = client.account_following(me.id) @@ -82,111 +82,111 @@ c.execute("CREATE TABLE IF NOT EXISTS `toots` (id INT NOT NULL UNIQUE PRIMARY KE db.commit() def handleCtrlC(signal, frame): - print("\nPREMATURE EVACUATION - Saving chunks") - db.commit() - sys.exit(1) + print("\nPREMATURE EVACUATION - Saving chunks") + db.commit() + sys.exit(1) signal.signal(signal.SIGINT, handleCtrlC) patterns = { - "handle": re.compile(r"^.*@(.+)"), - "url": re.compile(r"https?:\/\/(.*)"), - "uri": re.compile(r'template="([^"]+)"'), - "pid": re.compile(r"[^\/]+$"), + "handle": re.compile(r"^.*@(.+)"), + "url": re.compile(r"https?:\/\/(.*)"), + "uri": re.compile(r'template="([^"]+)"'), + "pid": re.compile(r"[^\/]+$"), } for f in following: - last_toot = c.execute("SELECT id FROM `toots` WHERE userid LIKE ? ORDER BY id DESC LIMIT 1", (f.id,)).fetchone() - if last_toot != None: - last_toot = last_toot[0] - else: - last_toot = 0 - print("Harvesting toots for user @{}, starting from {}".format(f.acct, last_toot)) + last_toot = c.execute("SELECT id FROM `toots` WHERE userid LIKE ? ORDER BY id DESC LIMIT 1", (f.id,)).fetchone() + if last_toot != None: + last_toot = last_toot[0] + else: + last_toot = 0 + print("Harvesting toots for user @{}, starting from {}".format(f.acct, last_toot)) - #find the user's activitypub outbox - print("WebFingering... (do not laugh at this. WebFinger is a federated protocol. https://wikipedia.org/wiki/WebFinger)") - instance = patterns["handle"].search(f.acct) - if instance == None: - instance = patterns["url"].search(cfg['site']).group(1) - else: - instance = instance.group(1) + #find the user's activitypub outbox + print("WebFingering... (do not laugh at this. WebFinger is a federated protocol. https://wikipedia.org/wiki/WebFinger)") + instance = patterns["handle"].search(f.acct) + if instance == None: + instance = patterns["url"].search(cfg['site']).group(1) + else: + instance = instance.group(1) - if instance in cfg['instance_blacklist']: - print("skipping blacklisted instance: {}".format(instance)) - continue + if instance in cfg['instance_blacklist']: + print("skipping blacklisted instance: {}".format(instance)) + continue - try: - r = requests.get("https://{}/.well-known/host-meta".format(instance), timeout=10) - uri = patterns["uri"].search(r.text).group(1) - uri = uri.format(uri = "{}@{}".format(f.username, instance)) - r = requests.get(uri, headers={"Accept": "application/json"}, timeout=10) - j = r.json() - for link in j['links']: - if link['rel'] == 'self': - #this is a link formatted like "https://instan.ce/users/username", which is what we need - uri = link['href'] - uri = "{}/outbox?page=true".format(uri) - r = requests.get(uri, timeout=10) - j = r.json() - except Exception: - print("oopsy woopsy!! we made a fucky wucky!!!\n(we're probably rate limited, please hang up and try again)") - sys.exit(1) + try: + r = requests.get("https://{}/.well-known/host-meta".format(instance), timeout=10) + uri = patterns["uri"].search(r.text).group(1) + uri = uri.format(uri = "{}@{}".format(f.username, instance)) + r = requests.get(uri, headers={"Accept": "application/json"}, timeout=10) + j = r.json() + for link in j['links']: + if link['rel'] == 'self': + #this is a link formatted like "https://instan.ce/users/username", which is what we need + uri = link['href'] + uri = "{}/outbox?page=true".format(uri) + r = requests.get(uri, timeout=10) + j = r.json() + except Exception: + print("oopsy woopsy!! we made a fucky wucky!!!\n(we're probably rate limited, please hang up and try again)") + sys.exit(1) - pleroma = False - if 'first' in j and type(j['first']) != str: - print("Pleroma instance detected") - pleroma = True - j = j['first'] - else: - print("Mastodon/Misskey instance detected") - uri = "{}&min_id={}".format(uri, last_toot) - r = requests.get(uri) - j = r.json() + pleroma = False + if 'first' in j and type(j['first']) != str: + print("Pleroma instance detected") + pleroma = True + j = j['first'] + else: + print("Mastodon/Misskey instance detected") + uri = "{}&min_id={}".format(uri, last_toot) + r = requests.get(uri) + j = r.json() - print("Downloading and saving toots", end='', flush=True) - done = False - try: - while not done and len(j['orderedItems']) > 0: - for oi in j['orderedItems']: - if oi['type'] != "Create": - continue #this isn't a toot/post/status/whatever, it's a boost or a follow or some other activitypub thing. ignore - - # its a toost baby - content = oi['object']['content'] - if oi['object']['summary'] != None: - #don't download CW'd toots. if you want your bot to download and learn from CW'd toots, replace "continue" with "pass". (todo: add a config.json option for this) - continue - toot = extract_toot(content) - # print(toot) - try: - if pleroma: - if c.execute("SELECT COUNT(*) FROM toots WHERE id LIKE ?", (oi['object']['id'],)).fetchone()[0] > 0: - #we've caught up to the notices we've already downloaded, so we can stop now - #you might be wondering, "lynne, what if the instance ratelimits you after 40 posts, and they've made 60 since main.py was last run? wouldn't the bot miss 20 posts and never be able to see them?" to which i reply, "it's called mstdn-ebooks not fediverse-ebooks. pleroma support is an afterthought" - done = True - break - pid = patterns["pid"].search(oi['object']['id']).group(0) - c.execute("REPLACE INTO toots (id, userid, uri, content) VALUES (?, ?, ?, ?)", ( - pid, - f.id, - oi['object']['id'], - toot - ) - ) - pass - except: - pass #ignore any toots that don't successfully go into the DB - if not pleroma: - r = requests.get(j['prev'], timeout=15) - else: - r = requests.get(j['next'], timeout=15) - j = r.json() - print('.', end='', flush=True) - print(" Done!") - db.commit() - except: - print("Encountered an error! Saving toots to database and moving to next followed account.") - db.commit() + print("Downloading and saving toots", end='', flush=True) + done = False + try: + while not done and len(j['orderedItems']) > 0: + for oi in j['orderedItems']: + if oi['type'] != "Create": + continue #this isn't a toot/post/status/whatever, it's a boost or a follow or some other activitypub thing. ignore + + # its a toost baby + content = oi['object']['content'] + if oi['object']['summary'] != None: + #don't download CW'd toots. if you want your bot to download and learn from CW'd toots, replace "continue" with "pass". (todo: add a config.json option for this) + continue + toot = extract_toot(content) + # print(toot) + try: + if pleroma: + if c.execute("SELECT COUNT(*) FROM toots WHERE id LIKE ?", (oi['object']['id'],)).fetchone()[0] > 0: + #we've caught up to the notices we've already downloaded, so we can stop now + #you might be wondering, "lynne, what if the instance ratelimits you after 40 posts, and they've made 60 since main.py was last run? wouldn't the bot miss 20 posts and never be able to see them?" to which i reply, "it's called mstdn-ebooks not fediverse-ebooks. pleroma support is an afterthought" + done = True + break + pid = patterns["pid"].search(oi['object']['id']).group(0) + c.execute("REPLACE INTO toots (id, userid, uri, content) VALUES (?, ?, ?, ?)", ( + pid, + f.id, + oi['object']['id'], + toot + ) + ) + pass + except: + pass #ignore any toots that don't successfully go into the DB + if not pleroma: + r = requests.get(j['prev'], timeout=15) + else: + r = requests.get(j['next'], timeout=15) + j = r.json() + print('.', end='', flush=True) + print(" Done!") + db.commit() + except: + print("Encountered an error! Saving toots to database and moving to next followed account.") + db.commit() print("Done!") From 4438d797e775ce0d95b6cd22fea69ae899567c9a Mon Sep 17 00:00:00 2001 From: Lynne Date: Thu, 21 Feb 2019 13:37:45 +1000 Subject: [PATCH 22/22] use config.sample.json if config.json doesn't exist --- main.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/main.py b/main.py index 2c52f98..5b7e100 100755 --- a/main.py +++ b/main.py @@ -7,12 +7,16 @@ from mastodon import Mastodon from os import path from bs4 import BeautifulSoup -import os, sqlite3, signal, sys, json, re +import os, sqlite3, signal, sys, json, re, shutil import requests import functions scopes = ["read:statuses", "read:accounts", "read:follows", "write:statuses", "read:notifications"] -cfg = json.load(open('config.json', 'r')) +try: + cfg = json.load(open('config.json', 'r')) +except: + shutil.copy2("config.sample.json", "config.json") + cfg = json.load(open('config.json', 'r')) #config.json *MUST* contain the instance URL, the instance blacklist (for dead/broken instances), and the CW text. if they're not provided, we'll fall back to defaults. if 'site' not in cfg: @@ -186,4 +190,4 @@ print("Done!") db.commit() db.execute("VACUUM") #compact db db.commit() -db.close() \ No newline at end of file +db.close()