From acec5b6668f673485975a3d3436aa0c0900b4dd9 Mon Sep 17 00:00:00 2001 From: Lynne Date: Sat, 8 Sep 2018 23:06:17 +0000 Subject: [PATCH] Added unicode compatibility, better stdout info --- gen.py | 12 +++--- main.py | 124 ++++++++++++++++++++++++++++---------------------------- 2 files changed, 69 insertions(+), 67 deletions(-) diff --git a/gen.py b/gen.py index 3cf1b15..47b12d6 100644 --- a/gen.py +++ b/gen.py @@ -3,19 +3,21 @@ import json import time from mastodon import Mastodon -api_base_url = "https://botsin.space" +api_base_url = "https://botsin.space" #todo: this shouldn't be hardcoded client = Mastodon( client_id="clientcred.secret", access_token="usercred.secret", api_base_url=api_base_url) -with open("corpus.txt") as fp: - model = markovify.NewlineText(fp.read()) +with open("corpus.txt", encoding="utf-8") as fp: + model = markovify.NewlineText(fp.read()) print("tooting") sentence = None # you will make that damn sentence while sentence is None: - sentence = model.make_sentence(tries=100000) -client.toot(sentence.replace("\0", "\n")) + sentence = model.make_sentence(tries=100000) +toot = sentence.replace("\0", "\n") +client.toot(toot) +print("Created toot: {}".format(toot)) diff --git a/main.py b/main.py index b837e52..e9e72b7 100644 --- a/main.py +++ b/main.py @@ -7,84 +7,84 @@ import re api_base_url = "https://botsin.space" if not path.exists("clientcred.secret"): - print("No clientcred.secret, registering application") - Mastodon.create_app("ebooks", api_base_url=api_base_url, to_file="clientcred.secret") + print("No clientcred.secret, registering application") + Mastodon.create_app("ebooks", api_base_url=api_base_url, to_file="clientcred.secret") if not path.exists("usercred.secret"): - print("No usercred.secret, registering application") - email = input("Email: ") - password = getpass("Password: ") - client = Mastodon(client_id="clientcred.secret", api_base_url=api_base_url) - client.log_in(email, password, to_file="usercred.secret") + print("No usercred.secret, registering application") + email = input("Email: ") + password = getpass("Password: ") + client = Mastodon(client_id="clientcred.secret", api_base_url=api_base_url) + client.log_in(email, password, to_file="usercred.secret") def parse_toot(toot): - if toot.spoiler_text != "": return - if toot.reblog is not None: return - if toot.visibility not in ["public", "unlisted"]: return + if toot.spoiler_text != "": return + if toot.reblog is not None: return + if toot.visibility not in ["public", "unlisted"]: return - soup = BeautifulSoup(toot.content, "html.parser") - - # pull the mentions out - # for mention in soup.select("span.h-card"): - # mention.unwrap() + soup = BeautifulSoup(toot.content, "html.parser") + + # pull the mentions out + # for mention in soup.select("span.h-card"): + # mention.unwrap() - # for mention in soup.select("a.u-url.mention"): - # mention.unwrap() + # for mention in soup.select("a.u-url.mention"): + # mention.unwrap() - # we will destroy the mentions until we're ready to use them - # someday turbocat, you will talk to your sibilings - for mention in soup.select("span.h-card"): - mention.decompose() - - # make all linebreaks actual linebreaks - for lb in soup.select("br"): - lb.insert_after("\n") - lb.decompose() + # we will destroy the mentions until we're ready to use them + # someday turbocat, you will talk to your sibilings + for mention in soup.select("span.h-card"): + mention.decompose() + + # make all linebreaks actual linebreaks + for lb in soup.select("br"): + lb.insert_after("\n") + lb.decompose() - # make each p element its own line because sometimes they decide not to be - for p in soup.select("p"): - p.insert_after("\n") - p.unwrap() - - # keep hashtags in the toots - for ht in soup.select("a.hashtag"): - ht.unwrap() + # make each p element its own line because sometimes they decide not to be + for p in soup.select("p"): + p.insert_after("\n") + p.unwrap() + + # keep hashtags in the toots + for ht in soup.select("a.hashtag"): + ht.unwrap() - # unwrap all links (i like the bots posting links) - for link in soup.select("a"): - link.insert_after(link["href"]) - link.decompose() + # unwrap all links (i like the bots posting links) + for link in soup.select("a"): + link.insert_after(link["href"]) + link.decompose() - text = map(lambda a: a.strip(), soup.get_text().strip().split("\n")) + text = map(lambda a: a.strip(), soup.get_text().strip().split("\n")) - # next up: store this and patch markovify to take it - # return {"text": text, "mentions": mentions, "links": links} - # it's 4am though so we're not doing that now, but i still want the parser updates - return "\0".join(list(text)) + # next up: store this and patch markovify to take it + # return {"text": text, "mentions": mentions, "links": links} + # it's 4am though so we're not doing that now, but i still want the parser updates + return "\0".join(list(text)) def get_toots(client, id): - i = 0 - toots = client.account_statuses(id) - while toots is not None: - for toot in toots: - t = parse_toot(toot) - if t != None: - yield t - toots = client.fetch_next(toots) - i += 1 - if i%10 == 0: - print(i) + i = 0 + toots = client.account_statuses(id) + while toots is not None: + for toot in toots: + t = parse_toot(toot) + if t != None: + yield t + toots = client.fetch_next(toots) + i += 1 + if i%10 == 0: + print(i) client = Mastodon( - client_id="clientcred.secret", - access_token="usercred.secret", - api_base_url=api_base_url) + client_id="clientcred.secret", + access_token="usercred.secret", + api_base_url=api_base_url) me = client.account_verify_credentials() following = client.account_following(me.id) -with open("corpus.txt", "w+") as fp: - for f in following: - print(f.username) - for t in get_toots(client, f.id): - fp.write(t + "\n") +with open("corpus.txt", "w+", encoding="utf-8") as fp: + for f in following: + print("Downloading toots for user @{}".format(f.username)) + for t in get_toots(client, f.id): + fp.write(t + "\n")