newlines work
also did a bunch of work for future silly stuff
This commit is contained in:
parent
d9dbd654ca
commit
a08681e737
2 changed files with 40 additions and 15 deletions
4
gen.py
4
gen.py
|
@ -14,8 +14,8 @@ with open("corpus.txt") as fp:
|
|||
model = markovify.NewlineText(fp.read())
|
||||
|
||||
print("tooting")
|
||||
# This is not the best long term fix tbh
|
||||
sentence = None
|
||||
# you will make that damn sentence
|
||||
while sentence is None:
|
||||
sentence = model.make_sentence(tries=100000)
|
||||
client.toot(sentence.replace(chr(31), "\n"))
|
||||
client.toot(sentence.replace("\0", "\n"))
|
||||
|
|
51
main.py
51
main.py
|
@ -1,6 +1,7 @@
|
|||
from mastodon import Mastodon
|
||||
from getpass import getpass
|
||||
from os import path
|
||||
from bs4 import BeautifulSoup
|
||||
import json
|
||||
import re
|
||||
|
||||
|
@ -17,25 +18,49 @@ if not path.exists("usercred.secret"):
|
|||
client = Mastodon(client_id="clientcred.secret", api_base_url=api_base_url)
|
||||
client.log_in(email, password, to_file="usercred.secret")
|
||||
|
||||
def remove_tags(text):
|
||||
text = text.strip().replace("<br>", chr(31))
|
||||
TAG_RE = re.compile(r'<[^>]+>')
|
||||
next_re = TAG_RE.sub('', text)
|
||||
last = re.sub(r"(?:\@|https?\"//)\S+", "", next_re)
|
||||
if len(last) > 0:
|
||||
if last[0] == " ":
|
||||
last = last[1:]
|
||||
else:
|
||||
last = ""
|
||||
return last
|
||||
def parse_toot(toot):
|
||||
soup = BeautifulSoup(toot.content, "html.parser")
|
||||
if toot.spoiler_text != "": return
|
||||
if toot.reblog is not None: return
|
||||
if toot.visibility not in ["public", "unlisted"]: return
|
||||
|
||||
# remove all mentions
|
||||
for mention in soup.select("span"):
|
||||
mention.decompose()
|
||||
|
||||
# make all linebreaks actual linebreaks
|
||||
for lb in soup.select("br"):
|
||||
lb.insert_after("\n")
|
||||
lb.decompose()
|
||||
|
||||
# put each p element its own line because sometimes they decide not to be
|
||||
for p in soup.select("p"):
|
||||
p.insert_after("\n")
|
||||
p.unwrap()
|
||||
|
||||
# unwrap all links (i like the bots posting links)
|
||||
links = []
|
||||
for link in soup.select("a"):
|
||||
links += [link["href"]]
|
||||
link.decompose()
|
||||
|
||||
text = map(lambda a: a.strip(), soup.get_text().strip().split("\n"))
|
||||
|
||||
mentions = [mention.acct for mention in toot.mentions]
|
||||
|
||||
# next up: store this and patch markovify to take it
|
||||
# return {"text": text, "mentions": mentions, "links": links}
|
||||
# it's 4am though so we're not doing that now, but i still want the parser updates
|
||||
return "\0".join(list(text) + links)
|
||||
|
||||
def get_toots(client, id):
|
||||
i = 0
|
||||
toots = client.account_statuses(id)
|
||||
while toots is not None:
|
||||
for toot in toots:
|
||||
if toot.spoiler_text == "" and toot.reblog is None and toot.visibility in ["public", "unlisted"]:
|
||||
yield remove_tags(toot.content)
|
||||
t = parse_toot(toot)
|
||||
if t != None:
|
||||
yield t
|
||||
toots = client.fetch_next(toots)
|
||||
i += 1
|
||||
if i%10 == 0:
|
||||
|
|
Loading…
Reference in a new issue