newlines work
also did a bunch of work for future silly stuff
This commit is contained in:
parent
d9dbd654ca
commit
a08681e737
2 changed files with 40 additions and 15 deletions
4
gen.py
4
gen.py
|
@ -14,8 +14,8 @@ with open("corpus.txt") as fp:
|
||||||
model = markovify.NewlineText(fp.read())
|
model = markovify.NewlineText(fp.read())
|
||||||
|
|
||||||
print("tooting")
|
print("tooting")
|
||||||
# This is not the best long term fix tbh
|
|
||||||
sentence = None
|
sentence = None
|
||||||
|
# you will make that damn sentence
|
||||||
while sentence is None:
|
while sentence is None:
|
||||||
sentence = model.make_sentence(tries=100000)
|
sentence = model.make_sentence(tries=100000)
|
||||||
client.toot(sentence.replace(chr(31), "\n"))
|
client.toot(sentence.replace("\0", "\n"))
|
||||||
|
|
51
main.py
51
main.py
|
@ -1,6 +1,7 @@
|
||||||
from mastodon import Mastodon
|
from mastodon import Mastodon
|
||||||
from getpass import getpass
|
from getpass import getpass
|
||||||
from os import path
|
from os import path
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
@ -17,25 +18,49 @@ if not path.exists("usercred.secret"):
|
||||||
client = Mastodon(client_id="clientcred.secret", api_base_url=api_base_url)
|
client = Mastodon(client_id="clientcred.secret", api_base_url=api_base_url)
|
||||||
client.log_in(email, password, to_file="usercred.secret")
|
client.log_in(email, password, to_file="usercred.secret")
|
||||||
|
|
||||||
def remove_tags(text):
|
def parse_toot(toot):
|
||||||
text = text.strip().replace("<br>", chr(31))
|
soup = BeautifulSoup(toot.content, "html.parser")
|
||||||
TAG_RE = re.compile(r'<[^>]+>')
|
if toot.spoiler_text != "": return
|
||||||
next_re = TAG_RE.sub('', text)
|
if toot.reblog is not None: return
|
||||||
last = re.sub(r"(?:\@|https?\"//)\S+", "", next_re)
|
if toot.visibility not in ["public", "unlisted"]: return
|
||||||
if len(last) > 0:
|
|
||||||
if last[0] == " ":
|
# remove all mentions
|
||||||
last = last[1:]
|
for mention in soup.select("span"):
|
||||||
else:
|
mention.decompose()
|
||||||
last = ""
|
|
||||||
return last
|
# make all linebreaks actual linebreaks
|
||||||
|
for lb in soup.select("br"):
|
||||||
|
lb.insert_after("\n")
|
||||||
|
lb.decompose()
|
||||||
|
|
||||||
|
# put each p element its own line because sometimes they decide not to be
|
||||||
|
for p in soup.select("p"):
|
||||||
|
p.insert_after("\n")
|
||||||
|
p.unwrap()
|
||||||
|
|
||||||
|
# unwrap all links (i like the bots posting links)
|
||||||
|
links = []
|
||||||
|
for link in soup.select("a"):
|
||||||
|
links += [link["href"]]
|
||||||
|
link.decompose()
|
||||||
|
|
||||||
|
text = map(lambda a: a.strip(), soup.get_text().strip().split("\n"))
|
||||||
|
|
||||||
|
mentions = [mention.acct for mention in toot.mentions]
|
||||||
|
|
||||||
|
# next up: store this and patch markovify to take it
|
||||||
|
# return {"text": text, "mentions": mentions, "links": links}
|
||||||
|
# it's 4am though so we're not doing that now, but i still want the parser updates
|
||||||
|
return "\0".join(list(text) + links)
|
||||||
|
|
||||||
def get_toots(client, id):
|
def get_toots(client, id):
|
||||||
i = 0
|
i = 0
|
||||||
toots = client.account_statuses(id)
|
toots = client.account_statuses(id)
|
||||||
while toots is not None:
|
while toots is not None:
|
||||||
for toot in toots:
|
for toot in toots:
|
||||||
if toot.spoiler_text == "" and toot.reblog is None and toot.visibility in ["public", "unlisted"]:
|
t = parse_toot(toot)
|
||||||
yield remove_tags(toot.content)
|
if t != None:
|
||||||
|
yield t
|
||||||
toots = client.fetch_next(toots)
|
toots = client.fetch_next(toots)
|
||||||
i += 1
|
i += 1
|
||||||
if i%10 == 0:
|
if i%10 == 0:
|
||||||
|
|
Loading…
Reference in a new issue