code cleanup, fixes #23

This commit is contained in:
Lynne 2019-05-19 23:06:31 +10:00
parent 5fcefd12b0
commit 354ea46dcb
No known key found for this signature in database
GPG key ID: FB7B970303ACE499
4 changed files with 55 additions and 41 deletions

View file

@ -5,7 +5,7 @@
import markovify import markovify
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import re, multiprocessing, sqlite3, shutil, os, json import re, multiprocessing, sqlite3, shutil, os, json, html
cfg = json.load(open('config.json')) cfg = json.load(open('config.json'))
@ -48,21 +48,18 @@ def make_toot(force_markov = False, args = None):
return make_toot_markov() return make_toot_markov()
def make_toot_markov(query = None): def make_toot_markov(query = None):
tries = 0
toot = None toot = None
while toot == None and tries < 10: #try to make a toot 10 times pin, pout = multiprocessing.Pipe(False)
pin, pout = multiprocessing.Pipe(False) p = multiprocessing.Process(target = make_sentence, args = [pout])
p = multiprocessing.Process(target = make_sentence, args = [pout]) p.start()
p.start() p.join(5) #wait 5 seconds to get something
p.join(10) #wait 10 seconds to get something if p.is_alive(): #if it's still trying to make a toot after 5 seconds
if p.is_alive(): #if it's still trying to make a toot after 10 seconds p.terminate()
p.terminate() p.join()
p.join() else:
toot = None toot = pin.recv()
tries = tries + 1 #give up, and increment tries by one
else: if toot == None:
toot = pin.recv()
if toot == None: #if we've tried and failed ten times, just give up
toot = "Toot generation failed! Contact Lynne (lynnesbian@fedi.lynnesbian.space) for assistance." toot = "Toot generation failed! Contact Lynne (lynnesbian@fedi.lynnesbian.space) for assistance."
return { return {
"toot": toot, "toot": toot,
@ -70,8 +67,7 @@ def make_toot_markov(query = None):
} }
def extract_toot(toot): def extract_toot(toot):
toot = toot.replace("&apos;", "'") #convert HTML stuff to normal stuff toot = html.unescape(toot) #convert HTML escape codes to text
toot = toot.replace("&quot;", '"') #ditto
soup = BeautifulSoup(toot, "html.parser") soup = BeautifulSoup(toot, "html.parser")
for lb in soup.select("br"): #replace <br> with linebreak for lb in soup.select("br"): #replace <br> with linebreak
lb.insert_after("\n") lb.insert_after("\n")

6
gen.py
View file

@ -20,8 +20,8 @@ client = None
if not args.simulate: if not args.simulate:
client = Mastodon( client = Mastodon(
client_id=cfg['client']['id'], client_id=cfg['client']['id'],
client_secret=cfg['client']['secret'], client_secret=cfg['client']['secret'],
access_token=cfg['secret'], access_token=cfg['secret'],
api_base_url=cfg['site']) api_base_url=cfg['site'])
if __name__ == '__main__': if __name__ == '__main__':
@ -36,7 +36,7 @@ if __name__ == '__main__':
client.status_post(toot['toot'], visibility = 'unlisted', spoiler_text = cfg['cw']) client.status_post(toot['toot'], visibility = 'unlisted', spoiler_text = cfg['cw'])
except Exception as err: except Exception as err:
toot = { toot = {
"toot": "An unknown error that should never happen occurred. Maybe it's because of the spoiler text, which is {}. If not, I have no idea what went wrong. This is an error message -- contact lynnesbian@fedi.lynnesbian.space for assistance.".format(cfg['cw']) "toot": "An error occurred while submitting the generated post. Contact lynnesbian@fedi.lynnesbian.space for assistance."
} }
client.status_post(toot['toot'], visibility = 'unlisted', spoiler_text = "Error!") client.status_post(toot['toot'], visibility = 'unlisted', spoiler_text = "Error!")
try: try:

45
main.py
View file

@ -107,7 +107,7 @@ for f in following:
last_toot = last_toot[0] last_toot = last_toot[0]
else: else:
last_toot = 0 last_toot = 0
print("Harvesting toots for user @{}, starting from {}".format(f.acct, last_toot)) print("Downloading posts for user @{}, starting from {}".format(f.acct, last_toot))
#find the user's activitypub outbox #find the user's activitypub outbox
print("WebFingering...") print("WebFingering...")
@ -122,34 +122,43 @@ for f in following:
continue continue
try: try:
# 1. download host-meta to find webfing URL
r = requests.get("https://{}/.well-known/host-meta".format(instance), timeout=10) r = requests.get("https://{}/.well-known/host-meta".format(instance), timeout=10)
# 2. use webfinger to find user's info page
uri = patterns["uri"].search(r.text).group(1) uri = patterns["uri"].search(r.text).group(1)
uri = uri.format(uri = "{}@{}".format(f.username, instance)) uri = uri.format(uri = "{}@{}".format(f.username, instance))
r = requests.get(uri, headers={"Accept": "application/json"}, timeout=10) r = requests.get(uri, headers={"Accept": "application/json"}, timeout=10)
j = r.json() j = r.json()
found = False
for link in j['links']: for link in j['links']:
if link['rel'] == 'self': if link['rel'] == 'self':
#this is a link formatted like "https://instan.ce/users/username", which is what we need #this is a link formatted like "https://instan.ce/users/username", which is what we need
uri = link['href'] uri = link['href']
found = True
break
if not found:
print("Couldn't find a valid ActivityPub outbox URL.")
# 3. download first page of outbox
uri = "{}/outbox?page=true".format(uri) uri = "{}/outbox?page=true".format(uri)
r = requests.get(uri, timeout=10) r = requests.get(uri, timeout=15)
j = r.json() j = r.json()
except Exception: except:
print("oopsy woopsy!! we made a fucky wucky!!!\n(we're probably rate limited, please hang up and try again)") print("oopsy woopsy!! we made a fucky wucky!!!\n(we're probably rate limited, please hang up and try again)")
sys.exit(1) sys.exit(1)
pleroma = False pleroma = False
if 'first' in j and type(j['first']) != str: if 'next' not in j:
print("Pleroma instance detected") print("Using Pleroma compatibility mode")
pleroma = True pleroma = True
j = j['first'] j = j['first']
else: else:
print("Mastodon/Misskey instance detected") print("Using standard mode")
uri = "{}&min_id={}".format(uri, last_toot) uri = "{}&min_id={}".format(uri, last_toot)
r = requests.get(uri) r = requests.get(uri)
j = r.json() j = r.json()
print("Downloading and saving toots", end='', flush=True) print("Downloading and saving posts", end='', flush=True)
done = False done = False
try: try:
while not done and len(j['orderedItems']) > 0: while not done and len(j['orderedItems']) > 0:
@ -169,7 +178,7 @@ for f in following:
done = True done = True
if cfg['lang']: if cfg['lang']:
try: try:
if oi['object']['contentMap'][cfg['lang']]: # filter for language if oi['object']['contentMap'][cfg['lang']]: # filter for language
insert_toot(oi, f, toot, c) insert_toot(oi, f, toot, c)
except KeyError: except KeyError:
#JSON doesn't have contentMap, just insert the toot irregardlessly #JSON doesn't have contentMap, just insert the toot irregardlessly
@ -179,10 +188,18 @@ for f in following:
pass pass
except: except:
pass #ignore any toots that don't successfully go into the DB pass #ignore any toots that don't successfully go into the DB
if not pleroma:
r = requests.get(j['prev'], timeout=15) # get the next/previous page
else: try:
r = requests.get(j['next'], timeout=15) if not pleroma:
r = requests.get(j['prev'], timeout=15)
else:
r = requests.get(j['next'], timeout=15)
except requests.Timeout:
print("HTTP timeout, site did not respond within 15 seconds")
except:
print("An error occurred while trying to obtain more posts.")
j = r.json() j = r.json()
print('.', end='', flush=True) print('.', end='', flush=True)
print(" Done!") print(" Done!")
@ -193,10 +210,10 @@ for f in following:
db.commit() db.commit()
else: else:
# TODO: remove duplicate code # TODO: remove duplicate code
print("Encountered an error! Saving toots to database and moving to next followed account.") print("Encountered an error! Saving posts to database and moving to next followed account.")
db.commit() db.commit()
except: except:
print("Encountered an error! Saving toots to database and moving to next followed account.") print("Encountered an error! Saving posts to database and moving to next followed account.")
db.commit() db.commit()
print("Done!") print("Done!")

View file

@ -4,7 +4,7 @@
# file, You can obtain one at http://mozilla.org/MPL/2.0/. # file, You can obtain one at http://mozilla.org/MPL/2.0/.
import mastodon import mastodon
import os, random, re, json import random, re, json
import functions import functions
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
@ -13,8 +13,8 @@ threads = {}
client = mastodon.Mastodon( client = mastodon.Mastodon(
client_id=cfg['client']['id'], client_id=cfg['client']['id'],
client_secret=cfg['client']['secret'], client_secret=cfg['client']['secret'],
access_token=cfg['secret'], access_token=cfg['secret'],
api_base_url=cfg['site']) api_base_url=cfg['site'])
def extract_toot(toot): def extract_toot(toot):
@ -28,6 +28,7 @@ class ReplyListener(mastodon.StreamListener):
if notification['type'] == 'mention': #if we're mentioned: if notification['type'] == 'mention': #if we're mentioned:
acct = "@" + notification['account']['acct'] #get the account's @ acct = "@" + notification['account']['acct'] #get the account's @
post_id = notification['status']['id'] post_id = notification['status']['id']
# check if we've already been participating in this thread # check if we've already been participating in this thread
try: try:
context = client.status_context(post_id) context = client.status_context(post_id)
@ -39,10 +40,10 @@ class ReplyListener(mastodon.StreamListener):
for post in context['ancestors']: for post in context['ancestors']:
if post['account']['id'] == me: if post['account']['id'] == me:
posts += 1 posts += 1
if posts >= cfg['max_thread_length']: if posts >= cfg['max_thread_length']:
# stop replying # stop replying
print("didn't reply (max_thread_length exceeded)") print("didn't reply (max_thread_length exceeded)")
return return
mention = extract_toot(notification['status']['content']) mention = extract_toot(notification['status']['content'])
toot = functions.make_toot(True)['toot'] #generate a toot toot = functions.make_toot(True)['toot'] #generate a toot