newlines work

also did a bunch of work for future silly stuff
This commit is contained in:
Jess 3Jane 2018-02-21 04:06:35 -05:00
parent d9dbd654ca
commit a08681e737
2 changed files with 40 additions and 15 deletions

4
gen.py
View file

@ -14,8 +14,8 @@ with open("corpus.txt") as fp:
model = markovify.NewlineText(fp.read())
print("tooting")
# This is not the best long term fix tbh
sentence = None
# you will make that damn sentence
while sentence is None:
sentence = model.make_sentence(tries=100000)
client.toot(sentence.replace(chr(31), "\n"))
client.toot(sentence.replace("\0", "\n"))

51
main.py
View file

@ -1,6 +1,7 @@
from mastodon import Mastodon
from getpass import getpass
from os import path
from bs4 import BeautifulSoup
import json
import re
@ -17,25 +18,49 @@ if not path.exists("usercred.secret"):
client = Mastodon(client_id="clientcred.secret", api_base_url=api_base_url)
client.log_in(email, password, to_file="usercred.secret")
def remove_tags(text):
text = text.strip().replace("<br>", chr(31))
TAG_RE = re.compile(r'<[^>]+>')
next_re = TAG_RE.sub('', text)
last = re.sub(r"(?:\@|https?\"//)\S+", "", next_re)
if len(last) > 0:
if last[0] == " ":
last = last[1:]
else:
last = ""
return last
def parse_toot(toot):
soup = BeautifulSoup(toot.content, "html.parser")
if toot.spoiler_text != "": return
if toot.reblog is not None: return
if toot.visibility not in ["public", "unlisted"]: return
# remove all mentions
for mention in soup.select("span"):
mention.decompose()
# make all linebreaks actual linebreaks
for lb in soup.select("br"):
lb.insert_after("\n")
lb.decompose()
# put each p element its own line because sometimes they decide not to be
for p in soup.select("p"):
p.insert_after("\n")
p.unwrap()
# unwrap all links (i like the bots posting links)
links = []
for link in soup.select("a"):
links += [link["href"]]
link.decompose()
text = map(lambda a: a.strip(), soup.get_text().strip().split("\n"))
mentions = [mention.acct for mention in toot.mentions]
# next up: store this and patch markovify to take it
# return {"text": text, "mentions": mentions, "links": links}
# it's 4am though so we're not doing that now, but i still want the parser updates
return "\0".join(list(text) + links)
def get_toots(client, id):
i = 0
toots = client.account_statuses(id)
while toots is not None:
for toot in toots:
if toot.spoiler_text == "" and toot.reblog is None and toot.visibility in ["public", "unlisted"]:
yield remove_tags(toot.content)
t = parse_toot(toot)
if t != None:
yield t
toots = client.fetch_next(toots)
i += 1
if i%10 == 0: