now doesn't heck up hashtags
also keeps links in the right place
This commit is contained in:
parent
a08681e737
commit
f0a8816305
1 changed files with 19 additions and 9 deletions
28
main.py
28
main.py
|
@ -19,13 +19,22 @@ if not path.exists("usercred.secret"):
|
||||||
client.log_in(email, password, to_file="usercred.secret")
|
client.log_in(email, password, to_file="usercred.secret")
|
||||||
|
|
||||||
def parse_toot(toot):
|
def parse_toot(toot):
|
||||||
soup = BeautifulSoup(toot.content, "html.parser")
|
|
||||||
if toot.spoiler_text != "": return
|
if toot.spoiler_text != "": return
|
||||||
if toot.reblog is not None: return
|
if toot.reblog is not None: return
|
||||||
if toot.visibility not in ["public", "unlisted"]: return
|
if toot.visibility not in ["public", "unlisted"]: return
|
||||||
|
|
||||||
|
soup = BeautifulSoup(toot.content, "html.parser")
|
||||||
|
|
||||||
# remove all mentions
|
# pull the mentions out
|
||||||
for mention in soup.select("span"):
|
# for mention in soup.select("span.h-card"):
|
||||||
|
# mention.unwrap()
|
||||||
|
|
||||||
|
# for mention in soup.select("a.u-url.mention"):
|
||||||
|
# mention.unwrap()
|
||||||
|
|
||||||
|
# we will destroy the mentions until we're ready to use them
|
||||||
|
# someday turbocat, you will talk to your sibilings
|
||||||
|
for mention in soup.select("span.h-card"):
|
||||||
mention.decompose()
|
mention.decompose()
|
||||||
|
|
||||||
# make all linebreaks actual linebreaks
|
# make all linebreaks actual linebreaks
|
||||||
|
@ -33,25 +42,26 @@ def parse_toot(toot):
|
||||||
lb.insert_after("\n")
|
lb.insert_after("\n")
|
||||||
lb.decompose()
|
lb.decompose()
|
||||||
|
|
||||||
# put each p element its own line because sometimes they decide not to be
|
# make each p element its own line because sometimes they decide not to be
|
||||||
for p in soup.select("p"):
|
for p in soup.select("p"):
|
||||||
p.insert_after("\n")
|
p.insert_after("\n")
|
||||||
p.unwrap()
|
p.unwrap()
|
||||||
|
|
||||||
|
# keep hashtags in the toots
|
||||||
|
for ht in soup.select("a.hashtag"):
|
||||||
|
ht.unwrap()
|
||||||
|
|
||||||
# unwrap all links (i like the bots posting links)
|
# unwrap all links (i like the bots posting links)
|
||||||
links = []
|
|
||||||
for link in soup.select("a"):
|
for link in soup.select("a"):
|
||||||
links += [link["href"]]
|
link.insert_after(link["href"])
|
||||||
link.decompose()
|
link.decompose()
|
||||||
|
|
||||||
text = map(lambda a: a.strip(), soup.get_text().strip().split("\n"))
|
text = map(lambda a: a.strip(), soup.get_text().strip().split("\n"))
|
||||||
|
|
||||||
mentions = [mention.acct for mention in toot.mentions]
|
|
||||||
|
|
||||||
# next up: store this and patch markovify to take it
|
# next up: store this and patch markovify to take it
|
||||||
# return {"text": text, "mentions": mentions, "links": links}
|
# return {"text": text, "mentions": mentions, "links": links}
|
||||||
# it's 4am though so we're not doing that now, but i still want the parser updates
|
# it's 4am though so we're not doing that now, but i still want the parser updates
|
||||||
return "\0".join(list(text) + links)
|
return "\0".join(list(text))
|
||||||
|
|
||||||
def get_toots(client, id):
|
def get_toots(client, id):
|
||||||
i = 0
|
i = 0
|
||||||
|
|
Loading…
Reference in a new issue