Fix line-endings; Increase portability; Add speedtyper.py
This commit is contained in:
@@ -1,47 +1,50 @@
|
||||
import requests
|
||||
import json
|
||||
import sys
|
||||
|
||||
if len(sys.argv) < 3:
|
||||
print("usage: python get_7k_pp.py [api key] [username]")
|
||||
exit(0)
|
||||
|
||||
api_key = sys.argv[1]
|
||||
usernames = sys.argv[2:]
|
||||
|
||||
def get_json(url):
|
||||
page = requests.get(url)
|
||||
|
||||
try:
|
||||
page.raise_for_status()
|
||||
except:
|
||||
print("Could not get '{}'".format(url))
|
||||
return []
|
||||
|
||||
return json.loads(page.text)
|
||||
|
||||
def get_user_best(api_key, username):
|
||||
return get_json("https://osu.ppy.sh/api/get_user_best?k={}&limit=100&m=3&u={}".format(api_key, username))
|
||||
|
||||
def get_beatmap(api_key, id):
|
||||
return get_json("https://osu.ppy.sh/api/get_beatmaps?k={}&b={}".format(api_key, id))
|
||||
|
||||
for username in usernames:
|
||||
scores_7k = []
|
||||
pp_7k = 0.0
|
||||
for score in get_user_best(api_key, username):
|
||||
info = get_beatmap(api_key, score["beatmap_id"])[0]
|
||||
if info["diff_size"] == '7':
|
||||
# theres probably a prettier solution for this
|
||||
percentage = 100.0
|
||||
for num in range(len(scores_7k)):
|
||||
percentage *= 0.95
|
||||
|
||||
pp_7k += float(score["pp"]) / 100.0 * percentage
|
||||
scores_7k.append("[{:.2f}+ ({:.2f}%)] {:.2f} {} [{}] {}k".format(pp_7k, percentage, float(score["pp"]), info["title"], info["version"], score["score"][:3]))
|
||||
|
||||
print("7k pp for '{}'".format(username))
|
||||
print("Total = {}".format(pp_7k))
|
||||
|
||||
for score_7k in scores_7k:
|
||||
print(score_7k)
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import requests
|
||||
import json
|
||||
import sys
|
||||
|
||||
if len(sys.argv) < 3:
|
||||
print("usage: python get_7k_pp.py [api key] [username]")
|
||||
exit(0)
|
||||
|
||||
api_key = sys.argv[1]
|
||||
usernames = sys.argv[2:]
|
||||
|
||||
def get_json(url):
|
||||
page = requests.get(url)
|
||||
|
||||
try:
|
||||
page.raise_for_status()
|
||||
except:
|
||||
print("Could not get '{}'".format(url))
|
||||
return []
|
||||
|
||||
return json.loads(page.text)
|
||||
|
||||
def get_user_best(api_key, username):
|
||||
return get_json("https://osu.ppy.sh/api/get_user_best?k={}&limit=100&m=3&u={}".format(api_key, username))
|
||||
|
||||
def get_beatmap(api_key, id):
|
||||
return get_json("https://osu.ppy.sh/api/get_beatmaps?k={}&b={}".format(api_key, id))
|
||||
|
||||
for username in usernames:
|
||||
scores_7k = []
|
||||
pp_7k = 0.0
|
||||
for score in get_user_best(api_key, username):
|
||||
info = get_beatmap(api_key, score["beatmap_id"])[0]
|
||||
if info["diff_size"] == '7':
|
||||
# theres probably a prettier solution for this
|
||||
percentage = 100.0
|
||||
for num in range(len(scores_7k)):
|
||||
percentage *= 0.95
|
||||
|
||||
pp_7k += float(score["pp"]) / 100.0 * percentage
|
||||
scores_7k.append("[{:.2f}+ ({:.2f}%)] {:.2f} {} [{}] {}k".format(pp_7k, percentage, float(score["pp"]), info["title"], info["version"], score["score"][:3]))
|
||||
|
||||
print("7k pp for '{}'".format(username))
|
||||
print("Total = {}".format(pp_7k))
|
||||
|
||||
for score_7k in scores_7k:
|
||||
print(score_7k)
|
||||
|
||||
@@ -1,34 +1,37 @@
|
||||
import sys
|
||||
|
||||
from selenium import webdriver
|
||||
|
||||
def login(driver, username, password):
|
||||
driver.get("https://osu.ppy.sh/forum/ucp.php?mode=login")
|
||||
driver.find_element_by_name("username").send_keys(username)
|
||||
driver.find_element_by_name("password").send_keys(password)
|
||||
driver.find_element_by_name("login").click()
|
||||
|
||||
def main(argv):
|
||||
if len(argv) < 5:
|
||||
print("Usage: {} [USERNAME] [PASSWORD] [SEARCH PARAMS] [NUM PAGES]".format(argv[0]))
|
||||
return
|
||||
|
||||
username = argv[1]
|
||||
password = argv[2]
|
||||
|
||||
search_params = argv[3].strip('"')
|
||||
num_pages = int(argv[4])
|
||||
|
||||
with webdriver.Firefox() as driver:
|
||||
login(driver, username, password)
|
||||
|
||||
for page_nr in range(1, num_pages + 1):
|
||||
if page_nr == 0:
|
||||
continue
|
||||
|
||||
driver.get("https://old.ppy.sh/p/beatmaplist?{}&page={}".format(search_params, page_nr))
|
||||
for beatmap_elem in driver.find_elements_by_class_name("beatmap"):
|
||||
print(beatmap_elem.get_property("id"))
|
||||
|
||||
if __name__ == "__main__":
|
||||
main(sys.argv)
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import sys
|
||||
|
||||
from selenium import webdriver
|
||||
|
||||
def login(driver, username, password):
|
||||
driver.get("https://osu.ppy.sh/forum/ucp.php?mode=login")
|
||||
driver.find_element_by_name("username").send_keys(username)
|
||||
driver.find_element_by_name("password").send_keys(password)
|
||||
driver.find_element_by_name("login").click()
|
||||
|
||||
def main(argv):
|
||||
if len(argv) < 5:
|
||||
print("Usage: {} [USERNAME] [PASSWORD] [SEARCH PARAMS] [NUM PAGES]".format(argv[0]))
|
||||
return
|
||||
|
||||
username = argv[1]
|
||||
password = argv[2]
|
||||
|
||||
search_params = argv[3].strip('"')
|
||||
num_pages = int(argv[4])
|
||||
|
||||
with webdriver.Firefox() as driver:
|
||||
login(driver, username, password)
|
||||
|
||||
for page_nr in range(1, num_pages + 1):
|
||||
if page_nr == 0:
|
||||
continue
|
||||
|
||||
driver.get("https://old.ppy.sh/p/beatmaplist?{}&page={}".format(search_params, page_nr))
|
||||
for beatmap_elem in driver.find_elements_by_class_name("beatmap"):
|
||||
print(beatmap_elem.get_property("id"))
|
||||
|
||||
if __name__ == "__main__":
|
||||
main(sys.argv)
|
||||
|
||||
@@ -1,23 +1,24 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import requests # http requests
|
||||
import bs4 # html parser
|
||||
|
||||
with open("titles.txt", "w", encoding="UTF-8") as file:
|
||||
for index in range(1, 175):
|
||||
url = "https://www.gogdb.org/products?page=" + str(index)
|
||||
print(url)
|
||||
|
||||
page = requests.get("https://www.gogdb.org/products?page=" + str(index))
|
||||
page.raise_for_status()
|
||||
|
||||
soup = bs4.BeautifulSoup(page.text, "html.parser")
|
||||
|
||||
producttable = soup.select("#product-table")[0]
|
||||
titles = producttable.select("tr")
|
||||
for title in titles:
|
||||
if len(title.select(".col-type")) == 0:
|
||||
continue
|
||||
|
||||
if title.select(".col-type")[0].text == 'Game':
|
||||
file.write(title.select(".col-name")[0].text.strip() + '\n')
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import requests # http requests
|
||||
import bs4 # html parser
|
||||
|
||||
with open("titles.txt", "w", encoding="UTF-8") as file:
|
||||
for index in range(1, 175):
|
||||
url = "https://www.gogdb.org/products?page=" + str(index)
|
||||
print(url)
|
||||
|
||||
page = requests.get("https://www.gogdb.org/products?page=" + str(index))
|
||||
page.raise_for_status()
|
||||
|
||||
soup = bs4.BeautifulSoup(page.text, "html.parser")
|
||||
|
||||
producttable = soup.select("#product-table")[0]
|
||||
titles = producttable.select("tr")
|
||||
for title in titles:
|
||||
if len(title.select(".col-type")) == 0:
|
||||
continue
|
||||
|
||||
if title.select(".col-type")[0].text == 'Game':
|
||||
file.write(title.select(".col-name")[0].text.strip() + '\n')
|
||||
|
||||
@@ -1,3 +1,6 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import requests, bs4, time
|
||||
|
||||
def get_titles(filename, title_type, maxrank):
|
||||
|
||||
@@ -1,137 +1,138 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# reads SteamIDs from ./accounts.txt and outputs ban information into ./output.html
|
||||
|
||||
import urllib.request
|
||||
import json
|
||||
import time
|
||||
|
||||
steamapikey = ""
|
||||
|
||||
# read file and remove trailing newline because we're making a list
|
||||
account_lines = [line.rstrip("\n") for line in open("accounts.txt").readlines()]
|
||||
|
||||
ids = []
|
||||
for line in account_lines:
|
||||
# https://developer.valvesoftware.com/wiki/SteamID
|
||||
Z = int(line.split(':')[2])
|
||||
V = 0x0110000100000000 # profile ID constant
|
||||
Y = int(line.split(':')[1])
|
||||
W = Z * 2 + V + Y
|
||||
ids.append(str(W))
|
||||
|
||||
# API takes in comma seperated steamids
|
||||
ids_string = ",".join([x for x in ids])
|
||||
|
||||
# https://developer.valvesoftware.com/wiki/Steam_Web_API
|
||||
summaries = json.load(urllib.request.urlopen("http://api.steampowered.com/ISteamUser/GetPlayerSummaries/v0002/?key=" + steamapikey + "&steamids=" + ids_string))
|
||||
bans = json.load(urllib.request.urlopen("http://api.steampowered.com/ISteamUser/GetPlayerBans/v1/?key=" + steamapikey + "&steamids=" + ids_string))
|
||||
|
||||
output_file = open("output.html", "w", encoding="utf-8")
|
||||
|
||||
output_file.write('\
|
||||
<!DOCTYPE html>\n\
|
||||
<html>\n\
|
||||
<head>\n\
|
||||
<style>\n\
|
||||
body {\n\
|
||||
font-family: sans-serif;\n\
|
||||
}\n\
|
||||
\n\
|
||||
table {\n\
|
||||
color: #222;\n\
|
||||
border-collapse: collapse;\n\
|
||||
}\n\
|
||||
\n\
|
||||
tr, th, td {\n\
|
||||
border: 1px solid #a2a9b1;\n\
|
||||
padding: 0.2em 0.4em;\n\
|
||||
}\n\
|
||||
\n\
|
||||
.pwned {\n\
|
||||
background-color: #ffb6c1\n\
|
||||
}\n\
|
||||
\n\
|
||||
th {\n\
|
||||
background-color: #eaecf0;\n\
|
||||
text-align: center;\n\
|
||||
}\n\
|
||||
\n\
|
||||
a:hover, a:visited, a:link, a:active {\n\
|
||||
text-decoration: none;\n\
|
||||
}\n\
|
||||
</style>\n\
|
||||
</head>\n\
|
||||
\n\
|
||||
<body>\n\
|
||||
<table>\n\
|
||||
<tr>\n\
|
||||
<th>ID</th>\n\
|
||||
<th>Name</th>\n\
|
||||
<th>Status</th>\n\
|
||||
<th>Type</th>\n\
|
||||
<th>BanDays</th>\n\
|
||||
<th>LogDays</th>\n\
|
||||
<th>Profile</th>\n\
|
||||
</tr>\n\
|
||||
')
|
||||
|
||||
numbanned = 0
|
||||
|
||||
for i in range(len(ids)):
|
||||
try:
|
||||
for summary in summaries['response']['players']:
|
||||
if summary['steamid'] == str(ids[i]):
|
||||
break
|
||||
except:
|
||||
continue
|
||||
|
||||
try:
|
||||
for ban in bans['players']:
|
||||
if ban['SteamId'] == str(ids[i]):
|
||||
break
|
||||
except:
|
||||
continue
|
||||
|
||||
status = ""
|
||||
bantype = ""
|
||||
bandays = ""
|
||||
|
||||
if ban['VACBanned']:
|
||||
status = "Pwned"
|
||||
bantype = "VAC"
|
||||
bandays = str(ban['DaysSinceLastBan'])
|
||||
numbanned += 1
|
||||
|
||||
if ban['NumberOfGameBans'] > 0:
|
||||
status = "Pwned"
|
||||
bantype = "Gameban"
|
||||
bandays = str(ban['DaysSinceLastBan'])
|
||||
numbanned += 1
|
||||
|
||||
name = summary['personaname']
|
||||
name = name.replace("<", "<") # escape html tag names
|
||||
name = name.replace(">", ">")
|
||||
|
||||
logdays = str(int((time.time() - summary['lastlogoff']) / 86400)) # length of a day in epoch
|
||||
|
||||
line_start = ' <td>' if status != "Pwned" else ' <td class="pwned">'
|
||||
|
||||
output_file.write(' <tr>\n')
|
||||
output_file.write(line_start + '<a target="_blank" href="' + 'https://steamcommunity.com/profiles/' + str(ids[i]) + '">' + str(ids[i]) + '</a></td>\n')
|
||||
output_file.write(line_start + name + '</td>\n')
|
||||
output_file.write(line_start + status + '</td>\n')
|
||||
output_file.write(line_start + bantype + '</td>\n')
|
||||
output_file.write(line_start + bandays + '</td>\n')
|
||||
output_file.write(line_start + logdays + '</td>\n')
|
||||
output_file.write(line_start + '<a target="_blank" href="' + 'https://steamcommunity.com/profiles/' + str(ids[i]) + '"><img src=' + summary['avatarmedium'] + ">"+ '</img></td>\n')
|
||||
output_file.write(' </tr>\n')
|
||||
|
||||
i += 1
|
||||
|
||||
output_file.write('\
|
||||
</table>\n\
|
||||
' + str(numbanned) + '/' + str(len(ids)) + ' banned\n\
|
||||
</body>\n\
|
||||
\
|
||||
</html>\n')
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# reads SteamIDs from ./accounts.txt and outputs ban information into ./output.html
|
||||
|
||||
import urllib.request
|
||||
import json
|
||||
import time
|
||||
|
||||
steamapikey = ""
|
||||
|
||||
# read file and remove trailing newline because we're making a list
|
||||
account_lines = [line.rstrip("\n") for line in open("accounts.txt").readlines()]
|
||||
|
||||
ids = []
|
||||
for line in account_lines:
|
||||
# https://developer.valvesoftware.com/wiki/SteamID
|
||||
Z = int(line.split(':')[2])
|
||||
V = 0x0110000100000000 # profile ID constant
|
||||
Y = int(line.split(':')[1])
|
||||
W = Z * 2 + V + Y
|
||||
ids.append(str(W))
|
||||
|
||||
# API takes in comma seperated steamids
|
||||
ids_string = ",".join([x for x in ids])
|
||||
|
||||
# https://developer.valvesoftware.com/wiki/Steam_Web_API
|
||||
summaries = json.load(urllib.request.urlopen("http://api.steampowered.com/ISteamUser/GetPlayerSummaries/v0002/?key=" + steamapikey + "&steamids=" + ids_string))
|
||||
bans = json.load(urllib.request.urlopen("http://api.steampowered.com/ISteamUser/GetPlayerBans/v1/?key=" + steamapikey + "&steamids=" + ids_string))
|
||||
|
||||
output_file = open("output.html", "w", encoding="utf-8")
|
||||
|
||||
output_file.write('\
|
||||
<!DOCTYPE html>\n\
|
||||
<html>\n\
|
||||
<head>\n\
|
||||
<style>\n\
|
||||
body {\n\
|
||||
font-family: sans-serif;\n\
|
||||
}\n\
|
||||
\n\
|
||||
table {\n\
|
||||
color: #222;\n\
|
||||
border-collapse: collapse;\n\
|
||||
}\n\
|
||||
\n\
|
||||
tr, th, td {\n\
|
||||
border: 1px solid #a2a9b1;\n\
|
||||
padding: 0.2em 0.4em;\n\
|
||||
}\n\
|
||||
\n\
|
||||
.pwned {\n\
|
||||
background-color: #ffb6c1\n\
|
||||
}\n\
|
||||
\n\
|
||||
th {\n\
|
||||
background-color: #eaecf0;\n\
|
||||
text-align: center;\n\
|
||||
}\n\
|
||||
\n\
|
||||
a:hover, a:visited, a:link, a:active {\n\
|
||||
text-decoration: none;\n\
|
||||
}\n\
|
||||
</style>\n\
|
||||
</head>\n\
|
||||
\n\
|
||||
<body>\n\
|
||||
<table>\n\
|
||||
<tr>\n\
|
||||
<th>ID</th>\n\
|
||||
<th>Name</th>\n\
|
||||
<th>Status</th>\n\
|
||||
<th>Type</th>\n\
|
||||
<th>BanDays</th>\n\
|
||||
<th>LogDays</th>\n\
|
||||
<th>Profile</th>\n\
|
||||
</tr>\n\
|
||||
')
|
||||
|
||||
numbanned = 0
|
||||
|
||||
for i in range(len(ids)):
|
||||
try:
|
||||
for summary in summaries['response']['players']:
|
||||
if summary['steamid'] == str(ids[i]):
|
||||
break
|
||||
except:
|
||||
continue
|
||||
|
||||
try:
|
||||
for ban in bans['players']:
|
||||
if ban['SteamId'] == str(ids[i]):
|
||||
break
|
||||
except:
|
||||
continue
|
||||
|
||||
status = ""
|
||||
bantype = ""
|
||||
bandays = ""
|
||||
|
||||
if ban['VACBanned']:
|
||||
status = "Pwned"
|
||||
bantype = "VAC"
|
||||
bandays = str(ban['DaysSinceLastBan'])
|
||||
numbanned += 1
|
||||
|
||||
if ban['NumberOfGameBans'] > 0:
|
||||
status = "Pwned"
|
||||
bantype = "Gameban"
|
||||
bandays = str(ban['DaysSinceLastBan'])
|
||||
numbanned += 1
|
||||
|
||||
name = summary['personaname']
|
||||
name = name.replace("<", "<") # escape html tag names
|
||||
name = name.replace(">", ">")
|
||||
|
||||
logdays = str(int((time.time() - summary['lastlogoff']) / 86400)) # length of a day in epoch
|
||||
|
||||
line_start = ' <td>' if status != "Pwned" else ' <td class="pwned">'
|
||||
|
||||
output_file.write(' <tr>\n')
|
||||
output_file.write(line_start + '<a target="_blank" href="' + 'https://steamcommunity.com/profiles/' + str(ids[i]) + '">' + str(ids[i]) + '</a></td>\n')
|
||||
output_file.write(line_start + name + '</td>\n')
|
||||
output_file.write(line_start + status + '</td>\n')
|
||||
output_file.write(line_start + bantype + '</td>\n')
|
||||
output_file.write(line_start + bandays + '</td>\n')
|
||||
output_file.write(line_start + logdays + '</td>\n')
|
||||
output_file.write(line_start + '<a target="_blank" href="' + 'https://steamcommunity.com/profiles/' + str(ids[i]) + '"><img src=' + summary['avatarmedium'] + ">"+ '</img></td>\n')
|
||||
output_file.write(' </tr>\n')
|
||||
|
||||
i += 1
|
||||
|
||||
output_file.write('\
|
||||
</table>\n\
|
||||
' + str(numbanned) + '/' + str(len(ids)) + ' banned\n\
|
||||
</body>\n\
|
||||
\
|
||||
</html>\n')
|
||||
|
||||
@@ -1,58 +1,59 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import requests
|
||||
import bs4
|
||||
import sys
|
||||
|
||||
if len(sys.argv) < 4:
|
||||
print('Usage: ' + sys.argv[0] + ' [login] [password] [page name]')
|
||||
exit(1)
|
||||
|
||||
login = sys.argv[1]
|
||||
password = sys.argv[2]
|
||||
page_name = sys.argv[3]
|
||||
|
||||
def download_sheet(s, url):
|
||||
page = s.get(url)
|
||||
|
||||
try:
|
||||
page.raise_for_status()
|
||||
except:
|
||||
print("Couldn't get %s" % url)
|
||||
return
|
||||
|
||||
soup = bs4.BeautifulSoup(page.text, 'html.parser')
|
||||
|
||||
links = soup.select('a')
|
||||
for link in links:
|
||||
if '.pdf' in link.text:
|
||||
with open(link.text[1:link.text.find('.pdf') + 4], 'wb') as f:
|
||||
file = s.get(link.attrs['href'])
|
||||
|
||||
try:
|
||||
page.raise_for_status()
|
||||
except:
|
||||
print("Couldn't get %s" % link.text)
|
||||
return
|
||||
|
||||
for chunk in file.iter_content(100000):
|
||||
f.write(chunk)
|
||||
|
||||
with requests.session() as s:
|
||||
login = s.post('https://hi10anime.com/wp-login.php', { 'login':login, 'password':password })
|
||||
login.raise_for_status()
|
||||
|
||||
if not 'You have successfully logged in. Welcome back!' in login.text:
|
||||
print("Couldn't log in")
|
||||
exit(1)
|
||||
|
||||
page = s.get('https://sheet.host/user/%s/sheets' % page_name)
|
||||
page.raise_for_status()
|
||||
|
||||
soup = bs4.BeautifulSoup(page.text, 'html.parser')
|
||||
|
||||
titles = soup.select('.score-title')
|
||||
|
||||
for title in titles:
|
||||
print('Getting %s' % title.text)
|
||||
download_sheet(s, title.attrs['href'])
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import requests
|
||||
import bs4
|
||||
import sys
|
||||
|
||||
if len(sys.argv) < 4:
|
||||
print('Usage: ' + sys.argv[0] + ' [login] [password] [page name]')
|
||||
exit(1)
|
||||
|
||||
login = sys.argv[1]
|
||||
password = sys.argv[2]
|
||||
page_name = sys.argv[3]
|
||||
|
||||
def download_sheet(s, url):
|
||||
page = s.get(url)
|
||||
|
||||
try:
|
||||
page.raise_for_status()
|
||||
except:
|
||||
print("Couldn't get %s" % url)
|
||||
return
|
||||
|
||||
soup = bs4.BeautifulSoup(page.text, 'html.parser')
|
||||
|
||||
links = soup.select('a')
|
||||
for link in links:
|
||||
if '.pdf' in link.text:
|
||||
with open(link.text[1:link.text.find('.pdf') + 4], 'wb') as f:
|
||||
file = s.get(link.attrs['href'])
|
||||
|
||||
try:
|
||||
page.raise_for_status()
|
||||
except:
|
||||
print("Couldn't get %s" % link.text)
|
||||
return
|
||||
|
||||
for chunk in file.iter_content(100000):
|
||||
f.write(chunk)
|
||||
|
||||
with requests.session() as s:
|
||||
login = s.post('https://hi10anime.com/wp-login.php', { 'login':login, 'password':password })
|
||||
login.raise_for_status()
|
||||
|
||||
if not 'You have successfully logged in. Welcome back!' in login.text:
|
||||
print("Couldn't log in")
|
||||
exit(1)
|
||||
|
||||
page = s.get('https://sheet.host/user/%s/sheets' % page_name)
|
||||
page.raise_for_status()
|
||||
|
||||
soup = bs4.BeautifulSoup(page.text, 'html.parser')
|
||||
|
||||
titles = soup.select('.score-title')
|
||||
|
||||
for title in titles:
|
||||
print('Getting %s' % title.text)
|
||||
download_sheet(s, title.attrs['href'])
|
||||
|
||||
@@ -1,66 +1,67 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import requests
|
||||
import bs4
|
||||
|
||||
def download_file(url):
|
||||
filename = url[url.rfind('/') + 1:]
|
||||
|
||||
print('Downloading %s' % filename)
|
||||
|
||||
file = requests.get(url)
|
||||
|
||||
try:
|
||||
file.raise_for_status()
|
||||
except:
|
||||
open(filename + '.failed', 'w')
|
||||
|
||||
with open(filename, 'wb') as f:
|
||||
for chunk in file.iter_content(100000):
|
||||
f.write(chunk)
|
||||
|
||||
f.close()
|
||||
|
||||
def get_file_name(url):
|
||||
page = requests.get(url)
|
||||
page.raise_for_status()
|
||||
|
||||
soup = bs4.BeautifulSoup(page.text, "html.parser")
|
||||
|
||||
cells = soup.select('td.even') # gay retardness
|
||||
for cell in cells:
|
||||
text = cell.getText()
|
||||
|
||||
if '.rar' in text or '.zip' in text or '.7z' in text:
|
||||
return text
|
||||
|
||||
|
||||
def scrape_site(url):
|
||||
# split the url to use later for constructing new urls
|
||||
base_url = url[:url.rfind('/') + 1]
|
||||
url = url[url.rfind('/') + 1:]
|
||||
|
||||
while True:
|
||||
print('Getting %s' % url)
|
||||
|
||||
page = requests.get(base_url + url)
|
||||
page.raise_for_status() # throw on fail
|
||||
|
||||
soup = bs4.BeautifulSoup(page.text, "html.parser")
|
||||
|
||||
titles = soup.select('a[title]')
|
||||
for title in titles:
|
||||
link = title.attrs['href']
|
||||
|
||||
if 'id' in link and not 'dl' in link: # find content links
|
||||
print('Found %s' % title.attrs['title'])
|
||||
download_file(base_url + 'sub/enganime/' + get_file_name(base_url + link))
|
||||
|
||||
next_link = soup.select('span.pagenav_next > a')
|
||||
if len(next_link) == 0:
|
||||
print('End of site')
|
||||
break
|
||||
|
||||
url = next_link[0].attrs['href']
|
||||
|
||||
scrape_site('http://subs.com.ru/list.php?c=enganime&p=5&w=asc&d=1')
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import requests
|
||||
import bs4
|
||||
|
||||
def download_file(url):
|
||||
filename = url[url.rfind('/') + 1:]
|
||||
|
||||
print('Downloading %s' % filename)
|
||||
|
||||
file = requests.get(url)
|
||||
|
||||
try:
|
||||
file.raise_for_status()
|
||||
except:
|
||||
open(filename + '.failed', 'w')
|
||||
|
||||
with open(filename, 'wb') as f:
|
||||
for chunk in file.iter_content(100000):
|
||||
f.write(chunk)
|
||||
|
||||
f.close()
|
||||
|
||||
def get_file_name(url):
|
||||
page = requests.get(url)
|
||||
page.raise_for_status()
|
||||
|
||||
soup = bs4.BeautifulSoup(page.text, "html.parser")
|
||||
|
||||
cells = soup.select('td.even') # gay retardness
|
||||
for cell in cells:
|
||||
text = cell.getText()
|
||||
|
||||
if '.rar' in text or '.zip' in text or '.7z' in text:
|
||||
return text
|
||||
|
||||
|
||||
def scrape_site(url):
|
||||
# split the url to use later for constructing new urls
|
||||
base_url = url[:url.rfind('/') + 1]
|
||||
url = url[url.rfind('/') + 1:]
|
||||
|
||||
while True:
|
||||
print('Getting %s' % url)
|
||||
|
||||
page = requests.get(base_url + url)
|
||||
page.raise_for_status() # throw on fail
|
||||
|
||||
soup = bs4.BeautifulSoup(page.text, "html.parser")
|
||||
|
||||
titles = soup.select('a[title]')
|
||||
for title in titles:
|
||||
link = title.attrs['href']
|
||||
|
||||
if 'id' in link and not 'dl' in link: # find content links
|
||||
print('Found %s' % title.attrs['title'])
|
||||
download_file(base_url + 'sub/enganime/' + get_file_name(base_url + link))
|
||||
|
||||
next_link = soup.select('span.pagenav_next > a')
|
||||
if len(next_link) == 0:
|
||||
print('End of site')
|
||||
break
|
||||
|
||||
url = next_link[0].attrs['href']
|
||||
|
||||
scrape_site('http://subs.com.ru/list.php?c=enganime&p=5&w=asc&d=1')
|
||||
|
||||
Reference in New Issue
Block a user