newlines work

also did a bunch of work for future silly stuff
This commit is contained in:
Jess 3Jane 2018-02-21 04:06:35 -05:00
parent d9dbd654ca
commit a08681e737
2 changed files with 40 additions and 15 deletions

4
gen.py
View file

@ -14,8 +14,8 @@ with open("corpus.txt") as fp:
model = markovify.NewlineText(fp.read()) model = markovify.NewlineText(fp.read())
print("tooting") print("tooting")
# This is not the best long term fix tbh
sentence = None sentence = None
# you will make that damn sentence
while sentence is None: while sentence is None:
sentence = model.make_sentence(tries=100000) sentence = model.make_sentence(tries=100000)
client.toot(sentence.replace(chr(31), "\n")) client.toot(sentence.replace("\0", "\n"))

51
main.py
View file

@ -1,6 +1,7 @@
from mastodon import Mastodon from mastodon import Mastodon
from getpass import getpass from getpass import getpass
from os import path from os import path
from bs4 import BeautifulSoup
import json import json
import re import re
@ -17,25 +18,49 @@ if not path.exists("usercred.secret"):
client = Mastodon(client_id="clientcred.secret", api_base_url=api_base_url) client = Mastodon(client_id="clientcred.secret", api_base_url=api_base_url)
client.log_in(email, password, to_file="usercred.secret") client.log_in(email, password, to_file="usercred.secret")
def remove_tags(text): def parse_toot(toot):
text = text.strip().replace("<br>", chr(31)) soup = BeautifulSoup(toot.content, "html.parser")
TAG_RE = re.compile(r'<[^>]+>') if toot.spoiler_text != "": return
next_re = TAG_RE.sub('', text) if toot.reblog is not None: return
last = re.sub(r"(?:\@|https?\"//)\S+", "", next_re) if toot.visibility not in ["public", "unlisted"]: return
if len(last) > 0:
if last[0] == " ": # remove all mentions
last = last[1:] for mention in soup.select("span"):
else: mention.decompose()
last = ""
return last # make all linebreaks actual linebreaks
for lb in soup.select("br"):
lb.insert_after("\n")
lb.decompose()
# put each p element its own line because sometimes they decide not to be
for p in soup.select("p"):
p.insert_after("\n")
p.unwrap()
# unwrap all links (i like the bots posting links)
links = []
for link in soup.select("a"):
links += [link["href"]]
link.decompose()
text = map(lambda a: a.strip(), soup.get_text().strip().split("\n"))
mentions = [mention.acct for mention in toot.mentions]
# next up: store this and patch markovify to take it
# return {"text": text, "mentions": mentions, "links": links}
# it's 4am though so we're not doing that now, but i still want the parser updates
return "\0".join(list(text) + links)
def get_toots(client, id): def get_toots(client, id):
i = 0 i = 0
toots = client.account_statuses(id) toots = client.account_statuses(id)
while toots is not None: while toots is not None:
for toot in toots: for toot in toots:
if toot.spoiler_text == "" and toot.reblog is None and toot.visibility in ["public", "unlisted"]: t = parse_toot(toot)
yield remove_tags(toot.content) if t != None:
yield t
toots = client.fetch_next(toots) toots = client.fetch_next(toots)
i += 1 i += 1
if i%10 == 0: if i%10 == 0: