Fix line-endings; Increase portability; Add speedtyper.py

2019-05-26 23:05:28 +03:00
parent 661a5984a3
commit 63a1b4f501
33 changed files with 1447 additions and 1341 deletions
--- a/python/web/get_7k_pp.py
+++ b/python/web/get_7k_pp.py
@@ -1,47 +1,50 @@
-import requests
-import json
-import sys
-
-if len(sys.argv) < 3:
-    print("usage: python get_7k_pp.py [api key] [username]")
-    exit(0)
-
-api_key = sys.argv[1]
-usernames = sys.argv[2:]
-
-def get_json(url):
-    page = requests.get(url)
-
-    try:
-        page.raise_for_status()
-    except:
-        print("Could not get '{}'".format(url))
-        return []
-    
-    return json.loads(page.text)
-
-def get_user_best(api_key, username):
-    return get_json("https://osu.ppy.sh/api/get_user_best?k={}&limit=100&m=3&u={}".format(api_key, username))
-
-def get_beatmap(api_key, id):
-    return get_json("https://osu.ppy.sh/api/get_beatmaps?k={}&b={}".format(api_key, id))
-
-for username in usernames:
-	scores_7k = []
-	pp_7k = 0.0
-	for score in get_user_best(api_key, username):
-		info = get_beatmap(api_key, score["beatmap_id"])[0]
-		if info["diff_size"] == '7':
-			# theres probably a prettier solution for this
-			percentage = 100.0
-			for num in range(len(scores_7k)):
-				percentage *= 0.95
-	
-			pp_7k += float(score["pp"]) / 100.0 * percentage
-			scores_7k.append("[{:.2f}+ ({:.2f}%)] {:.2f} {} [{}] {}k".format(pp_7k, percentage, float(score["pp"]), info["title"], info["version"], score["score"][:3]))
-	
-	print("7k pp for '{}'".format(username))
-	print("Total = {}".format(pp_7k))
-	
-	for score_7k in scores_7k:
-		print(score_7k)
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import requests
+import json
+import sys
+
+if len(sys.argv) < 3:
+    print("usage: python get_7k_pp.py [api key] [username]")
+    exit(0)
+
+api_key = sys.argv[1]
+usernames = sys.argv[2:]
+
+def get_json(url):
+    page = requests.get(url)
+
+    try:
+        page.raise_for_status()
+    except:
+        print("Could not get '{}'".format(url))
+        return []
+    
+    return json.loads(page.text)
+
+def get_user_best(api_key, username):
+    return get_json("https://osu.ppy.sh/api/get_user_best?k={}&limit=100&m=3&u={}".format(api_key, username))
+
+def get_beatmap(api_key, id):
+    return get_json("https://osu.ppy.sh/api/get_beatmaps?k={}&b={}".format(api_key, id))
+
+for username in usernames:
+	scores_7k = []
+	pp_7k = 0.0
+	for score in get_user_best(api_key, username):
+		info = get_beatmap(api_key, score["beatmap_id"])[0]
+		if info["diff_size"] == '7':
+			# theres probably a prettier solution for this
+			percentage = 100.0
+			for num in range(len(scores_7k)):
+				percentage *= 0.95
+	
+			pp_7k += float(score["pp"]) / 100.0 * percentage
+			scores_7k.append("[{:.2f}+ ({:.2f}%)] {:.2f} {} [{}] {}k".format(pp_7k, percentage, float(score["pp"]), info["title"], info["version"], score["score"][:3]))
+	
+	print("7k pp for '{}'".format(username))
+	print("Total = {}".format(pp_7k))
+	
+	for score_7k in scores_7k:
+		print(score_7k)
--- a/python/web/get_beatmaps.py
+++ b/python/web/get_beatmaps.py
@@ -1,34 +1,37 @@
-import sys
-
-from selenium import webdriver
-
-def login(driver, username, password):
-    driver.get("https://osu.ppy.sh/forum/ucp.php?mode=login")
-    driver.find_element_by_name("username").send_keys(username)
-    driver.find_element_by_name("password").send_keys(password)
-    driver.find_element_by_name("login").click()
-
-def main(argv):
-    if len(argv) < 5:
-        print("Usage: {} [USERNAME] [PASSWORD] [SEARCH PARAMS] [NUM PAGES]".format(argv[0]))
-        return
-    
-    username = argv[1]
-    password = argv[2]
-
-    search_params = argv[3].strip('"')
-    num_pages = int(argv[4])
-
-    with webdriver.Firefox() as driver:
-        login(driver, username, password)
-
-        for page_nr in range(1, num_pages + 1):
-            if page_nr == 0:
-                continue
-            
-            driver.get("https://old.ppy.sh/p/beatmaplist?{}&page={}".format(search_params, page_nr))
-            for beatmap_elem in driver.find_elements_by_class_name("beatmap"):
-                print(beatmap_elem.get_property("id"))
-
-if __name__ == "__main__":
-    main(sys.argv)
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import sys
+
+from selenium import webdriver
+
+def login(driver, username, password):
+    driver.get("https://osu.ppy.sh/forum/ucp.php?mode=login")
+    driver.find_element_by_name("username").send_keys(username)
+    driver.find_element_by_name("password").send_keys(password)
+    driver.find_element_by_name("login").click()
+
+def main(argv):
+    if len(argv) < 5:
+        print("Usage: {} [USERNAME] [PASSWORD] [SEARCH PARAMS] [NUM PAGES]".format(argv[0]))
+        return
+    
+    username = argv[1]
+    password = argv[2]
+
+    search_params = argv[3].strip('"')
+    num_pages = int(argv[4])
+
+    with webdriver.Firefox() as driver:
+        login(driver, username, password)
+
+        for page_nr in range(1, num_pages + 1):
+            if page_nr == 0:
+                continue
+            
+            driver.get("https://old.ppy.sh/p/beatmaplist?{}&page={}".format(search_params, page_nr))
+            for beatmap_elem in driver.find_elements_by_class_name("beatmap"):
+                print(beatmap_elem.get_property("id"))
+
+if __name__ == "__main__":
+    main(sys.argv)
--- a/python/web/gogdb_scaper.py
+++ b/python/web/gogdb_scaper.py
@@ -1,23 +1,24 @@
-#!/usr/bin/env python3
-
-import requests # http requests
-import bs4 # html parser
-
-with open("titles.txt", "w", encoding="UTF-8") as file:
-    for index in range(1, 175):
-        url = "https://www.gogdb.org/products?page=" + str(index)
-        print(url)
-
-        page = requests.get("https://www.gogdb.org/products?page=" + str(index))
-        page.raise_for_status()
-
-        soup = bs4.BeautifulSoup(page.text, "html.parser")
-
-        producttable = soup.select("#product-table")[0]
-        titles = producttable.select("tr")
-        for title in titles:
-            if len(title.select(".col-type")) == 0:
-                continue
-
-            if title.select(".col-type")[0].text == 'Game':
-                file.write(title.select(".col-name")[0].text.strip() + '\n')
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import requests # http requests
+import bs4 # html parser
+
+with open("titles.txt", "w", encoding="UTF-8") as file:
+    for index in range(1, 175):
+        url = "https://www.gogdb.org/products?page=" + str(index)
+        print(url)
+
+        page = requests.get("https://www.gogdb.org/products?page=" + str(index))
+        page.raise_for_status()
+
+        soup = bs4.BeautifulSoup(page.text, "html.parser")
+
+        producttable = soup.select("#product-table")[0]
+        titles = producttable.select("tr")
+        for title in titles:
+            if len(title.select(".col-type")) == 0:
+                continue
+
+            if title.select(".col-type")[0].text == 'Game':
+                file.write(title.select(".col-name")[0].text.strip() + '\n')
--- a/python/web/mal_top_fetcher.py
+++ b/python/web/mal_top_fetcher.py
@@ -1,3 +1,6 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
 import requests, bs4, time

 def get_titles(filename, title_type, maxrank):
--- a/python/web/pwned_checker.py
+++ b/python/web/pwned_checker.py
@@ -1,137 +1,138 @@
-#!/usr/bin/env python3
-
-# reads SteamIDs from ./accounts.txt and outputs ban information into ./output.html
-
-import urllib.request
-import json
-import time
-
-steamapikey = ""
-
-# read file and remove trailing newline because we're making a list
-account_lines = [line.rstrip("\n") for line in open("accounts.txt").readlines()]
-
-ids = []
-for line in account_lines:
-	# https://developer.valvesoftware.com/wiki/SteamID
-	Z = int(line.split(':')[2])
-	V = 0x0110000100000000 # profile ID constant
-	Y = int(line.split(':')[1])
-	W = Z * 2 + V + Y
-	ids.append(str(W))
-
-# API takes in comma seperated steamids
-ids_string = ",".join([x for x in ids])
-
-# https://developer.valvesoftware.com/wiki/Steam_Web_API
-summaries = json.load(urllib.request.urlopen("http://api.steampowered.com/ISteamUser/GetPlayerSummaries/v0002/?key=" + steamapikey + "&steamids=" + ids_string))
-bans      = json.load(urllib.request.urlopen("http://api.steampowered.com/ISteamUser/GetPlayerBans/v1/?key="         + steamapikey + "&steamids=" + ids_string))
-
-output_file = open("output.html", "w", encoding="utf-8")
-
-output_file.write('\
-<!DOCTYPE html>\n\
-<html>\n\
-	<head>\n\
-		<style>\n\
-			body {\n\
-				font-family: sans-serif;\n\
-			}\n\
-			\n\
-			table {\n\
-				color: #222;\n\
-				border-collapse: collapse;\n\
-			}\n\
-			\n\
-			tr, th, td {\n\
-				border: 1px solid #a2a9b1;\n\
-				padding: 0.2em 0.4em;\n\
-			}\n\
-			\n\
-			.pwned {\n\
-				background-color: #ffb6c1\n\
-			}\n\
-			\n\
-			th {\n\
-				background-color: #eaecf0;\n\
-				text-align: center;\n\
-			}\n\
-			\n\
-				a:hover, a:visited, a:link, a:active {\n\
-				text-decoration: none;\n\
-			}\n\
-		</style>\n\
-	</head>\n\
-\n\
-	<body>\n\
-		<table>\n\
-			<tr>\n\
-				<th>ID</th>\n\
-				<th>Name</th>\n\
-				<th>Status</th>\n\
-				<th>Type</th>\n\
-				<th>BanDays</th>\n\
-				<th>LogDays</th>\n\
-				<th>Profile</th>\n\
-			</tr>\n\
-')
-
-numbanned = 0
-
-for i in range(len(ids)):
-	try:
-		for summary in summaries['response']['players']:
-			if summary['steamid'] == str(ids[i]):
-				break
-	except:
-		continue
-		
-	try:
-		for ban in bans['players']:
-			if ban['SteamId'] == str(ids[i]):
-				break
-	except:
-		continue
-
-	status  = ""
-	bantype = ""
-	bandays = ""
-	
-	if ban['VACBanned']:
-		status  = "Pwned"
-		bantype = "VAC"
-		bandays = str(ban['DaysSinceLastBan'])
-		numbanned += 1
-	
-	if ban['NumberOfGameBans'] > 0:
-		status  = "Pwned"
-		bantype = "Gameban"
-		bandays = str(ban['DaysSinceLastBan'])
-		numbanned += 1
-	
-	name = summary['personaname']
-	name = name.replace("<", "&lt;") # escape html tag names
-	name = name.replace(">", "&gt;")
-	
-	logdays = str(int((time.time() - summary['lastlogoff']) / 86400)) # length of a day in epoch
-	
-	line_start = '				<td>' if status != "Pwned" else '				<td class="pwned">'
-	
-	output_file.write('			<tr>\n')
-	output_file.write(line_start + '<a target="_blank" href="' + 'https://steamcommunity.com/profiles/' + str(ids[i]) + '">' + str(ids[i]) + '</a></td>\n')
-	output_file.write(line_start + name + '</td>\n')
-	output_file.write(line_start + status + '</td>\n')
-	output_file.write(line_start + bantype + '</td>\n')
-	output_file.write(line_start + bandays + '</td>\n')
-	output_file.write(line_start + logdays + '</td>\n')
-	output_file.write(line_start + '<a target="_blank" href="' + 'https://steamcommunity.com/profiles/' + str(ids[i]) + '"><img src=' + summary['avatarmedium'] + ">"+ '</img></td>\n')
-	output_file.write('			</tr>\n')
-	
-	i += 1
-
-output_file.write('\
-		</table>\n\
-		' + str(numbanned) + '/' + str(len(ids)) + ' banned\n\
-	</body>\n\
-\
-</html>\n')
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# reads SteamIDs from ./accounts.txt and outputs ban information into ./output.html
+
+import urllib.request
+import json
+import time
+
+steamapikey = ""
+
+# read file and remove trailing newline because we're making a list
+account_lines = [line.rstrip("\n") for line in open("accounts.txt").readlines()]
+
+ids = []
+for line in account_lines:
+	# https://developer.valvesoftware.com/wiki/SteamID
+	Z = int(line.split(':')[2])
+	V = 0x0110000100000000 # profile ID constant
+	Y = int(line.split(':')[1])
+	W = Z * 2 + V + Y
+	ids.append(str(W))
+
+# API takes in comma seperated steamids
+ids_string = ",".join([x for x in ids])
+
+# https://developer.valvesoftware.com/wiki/Steam_Web_API
+summaries = json.load(urllib.request.urlopen("http://api.steampowered.com/ISteamUser/GetPlayerSummaries/v0002/?key=" + steamapikey + "&steamids=" + ids_string))
+bans      = json.load(urllib.request.urlopen("http://api.steampowered.com/ISteamUser/GetPlayerBans/v1/?key="         + steamapikey + "&steamids=" + ids_string))
+
+output_file = open("output.html", "w", encoding="utf-8")
+
+output_file.write('\
+<!DOCTYPE html>\n\
+<html>\n\
+	<head>\n\
+		<style>\n\
+			body {\n\
+				font-family: sans-serif;\n\
+			}\n\
+			\n\
+			table {\n\
+				color: #222;\n\
+				border-collapse: collapse;\n\
+			}\n\
+			\n\
+			tr, th, td {\n\
+				border: 1px solid #a2a9b1;\n\
+				padding: 0.2em 0.4em;\n\
+			}\n\
+			\n\
+			.pwned {\n\
+				background-color: #ffb6c1\n\
+			}\n\
+			\n\
+			th {\n\
+				background-color: #eaecf0;\n\
+				text-align: center;\n\
+			}\n\
+			\n\
+				a:hover, a:visited, a:link, a:active {\n\
+				text-decoration: none;\n\
+			}\n\
+		</style>\n\
+	</head>\n\
+\n\
+	<body>\n\
+		<table>\n\
+			<tr>\n\
+				<th>ID</th>\n\
+				<th>Name</th>\n\
+				<th>Status</th>\n\
+				<th>Type</th>\n\
+				<th>BanDays</th>\n\
+				<th>LogDays</th>\n\
+				<th>Profile</th>\n\
+			</tr>\n\
+')
+
+numbanned = 0
+
+for i in range(len(ids)):
+	try:
+		for summary in summaries['response']['players']:
+			if summary['steamid'] == str(ids[i]):
+				break
+	except:
+		continue
+		
+	try:
+		for ban in bans['players']:
+			if ban['SteamId'] == str(ids[i]):
+				break
+	except:
+		continue
+
+	status  = ""
+	bantype = ""
+	bandays = ""
+	
+	if ban['VACBanned']:
+		status  = "Pwned"
+		bantype = "VAC"
+		bandays = str(ban['DaysSinceLastBan'])
+		numbanned += 1
+	
+	if ban['NumberOfGameBans'] > 0:
+		status  = "Pwned"
+		bantype = "Gameban"
+		bandays = str(ban['DaysSinceLastBan'])
+		numbanned += 1
+	
+	name = summary['personaname']
+	name = name.replace("<", "&lt;") # escape html tag names
+	name = name.replace(">", "&gt;")
+	
+	logdays = str(int((time.time() - summary['lastlogoff']) / 86400)) # length of a day in epoch
+	
+	line_start = '				<td>' if status != "Pwned" else '				<td class="pwned">'
+	
+	output_file.write('			<tr>\n')
+	output_file.write(line_start + '<a target="_blank" href="' + 'https://steamcommunity.com/profiles/' + str(ids[i]) + '">' + str(ids[i]) + '</a></td>\n')
+	output_file.write(line_start + name + '</td>\n')
+	output_file.write(line_start + status + '</td>\n')
+	output_file.write(line_start + bantype + '</td>\n')
+	output_file.write(line_start + bandays + '</td>\n')
+	output_file.write(line_start + logdays + '</td>\n')
+	output_file.write(line_start + '<a target="_blank" href="' + 'https://steamcommunity.com/profiles/' + str(ids[i]) + '"><img src=' + summary['avatarmedium'] + ">"+ '</img></td>\n')
+	output_file.write('			</tr>\n')
+	
+	i += 1
+
+output_file.write('\
+		</table>\n\
+		' + str(numbanned) + '/' + str(len(ids)) + ' banned\n\
+	</body>\n\
+\
+</html>\n')
--- a/python/web/sheethost_scraper.py
+++ b/python/web/sheethost_scraper.py
@@ -1,58 +1,59 @@
-#!/usr/bin/env python3
-
-import requests
-import bs4
-import sys
-
-if len(sys.argv) < 4:
-	print('Usage: ' + sys.argv[0] + ' [login] [password] [page name]')
-	exit(1)
-
-login = sys.argv[1]
-password = sys.argv[2]
-page_name = sys.argv[3]
-
-def download_sheet(s, url):
-	page = s.get(url)
-
-	try:
-		page.raise_for_status()
-	except:
-		print("Couldn't get %s" % url)
-		return
-
-	soup = bs4.BeautifulSoup(page.text, 'html.parser')
-
-	links = soup.select('a')
-	for link in links:
-		if '.pdf' in link.text:
-			with open(link.text[1:link.text.find('.pdf') + 4], 'wb') as f:
-				file = s.get(link.attrs['href'])
-
-				try:
-					page.raise_for_status()
-				except:
-					print("Couldn't get %s" % link.text)
-					return
-
-				for chunk in file.iter_content(100000):
-					f.write(chunk)
-
-with requests.session() as s:
-	login = s.post('https://hi10anime.com/wp-login.php', { 'login':login, 'password':password })
-	login.raise_for_status()
-
-	if not 'You have successfully logged in. Welcome back!' in login.text:
-		print("Couldn't log in")
-		exit(1)
-
-	page = s.get('https://sheet.host/user/%s/sheets' % page_name)
-	page.raise_for_status()
-
-	soup = bs4.BeautifulSoup(page.text, 'html.parser')
-
-	titles = soup.select('.score-title')
-
-	for title in titles:
-		print('Getting %s' % title.text)
-		download_sheet(s, title.attrs['href'])
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import requests
+import bs4
+import sys
+
+if len(sys.argv) < 4:
+	print('Usage: ' + sys.argv[0] + ' [login] [password] [page name]')
+	exit(1)
+
+login = sys.argv[1]
+password = sys.argv[2]
+page_name = sys.argv[3]
+
+def download_sheet(s, url):
+	page = s.get(url)
+
+	try:
+		page.raise_for_status()
+	except:
+		print("Couldn't get %s" % url)
+		return
+
+	soup = bs4.BeautifulSoup(page.text, 'html.parser')
+
+	links = soup.select('a')
+	for link in links:
+		if '.pdf' in link.text:
+			with open(link.text[1:link.text.find('.pdf') + 4], 'wb') as f:
+				file = s.get(link.attrs['href'])
+
+				try:
+					page.raise_for_status()
+				except:
+					print("Couldn't get %s" % link.text)
+					return
+
+				for chunk in file.iter_content(100000):
+					f.write(chunk)
+
+with requests.session() as s:
+	login = s.post('https://hi10anime.com/wp-login.php', { 'login':login, 'password':password })
+	login.raise_for_status()
+
+	if not 'You have successfully logged in. Welcome back!' in login.text:
+		print("Couldn't log in")
+		exit(1)
+
+	page = s.get('https://sheet.host/user/%s/sheets' % page_name)
+	page.raise_for_status()
+
+	soup = bs4.BeautifulSoup(page.text, 'html.parser')
+
+	titles = soup.select('.score-title')
+
+	for title in titles:
+		print('Getting %s' % title.text)
+		download_sheet(s, title.attrs['href'])
--- a/python/web/subscomru_scraper.py
+++ b/python/web/subscomru_scraper.py
@@ -1,66 +1,67 @@
-#!/usr/bin/env python3
-
-import requests
-import bs4
-
-def download_file(url):
-	filename = url[url.rfind('/') + 1:]
-
-	print('Downloading %s' % filename)
-
-	file = requests.get(url)
-
-	try:
-		file.raise_for_status()
-	except:
-		open(filename + '.failed', 'w')
-
-	with open(filename, 'wb') as f:
-		for chunk in file.iter_content(100000):
-			f.write(chunk)
-
-		f.close()
-
-def get_file_name(url):
-	page = requests.get(url)
-	page.raise_for_status()
-
-	soup = bs4.BeautifulSoup(page.text, "html.parser")
-
-	cells = soup.select('td.even') # gay retardness
-	for cell in cells:
-		text = cell.getText()
-
-		if '.rar' in text or '.zip' in text or '.7z' in text:
-			return text
-
-
-def scrape_site(url):
-	# split the url to use later for constructing new urls
-	base_url = url[:url.rfind('/') + 1]
-	url = url[url.rfind('/') + 1:]
-
-	while True:
-		print('Getting %s' % url)
-
-		page = requests.get(base_url + url)
-		page.raise_for_status() # throw on fail
-
-		soup = bs4.BeautifulSoup(page.text, "html.parser")
-
-		titles = soup.select('a[title]')
-		for title in titles:
-			link = title.attrs['href']
-
-			if 'id' in link and not 'dl' in link: # find content links
-				print('Found %s' % title.attrs['title'])
-				download_file(base_url + 'sub/enganime/' + get_file_name(base_url + link))
-
-		next_link = soup.select('span.pagenav_next > a')
-		if len(next_link) == 0:
-			print('End of site')
-			break
-
-		url = next_link[0].attrs['href']
-
-scrape_site('http://subs.com.ru/list.php?c=enganime&p=5&w=asc&d=1')
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import requests
+import bs4
+
+def download_file(url):
+	filename = url[url.rfind('/') + 1:]
+
+	print('Downloading %s' % filename)
+
+	file = requests.get(url)
+
+	try:
+		file.raise_for_status()
+	except:
+		open(filename + '.failed', 'w')
+
+	with open(filename, 'wb') as f:
+		for chunk in file.iter_content(100000):
+			f.write(chunk)
+
+		f.close()
+
+def get_file_name(url):
+	page = requests.get(url)
+	page.raise_for_status()
+
+	soup = bs4.BeautifulSoup(page.text, "html.parser")
+
+	cells = soup.select('td.even') # gay retardness
+	for cell in cells:
+		text = cell.getText()
+
+		if '.rar' in text or '.zip' in text or '.7z' in text:
+			return text
+
+
+def scrape_site(url):
+	# split the url to use later for constructing new urls
+	base_url = url[:url.rfind('/') + 1]
+	url = url[url.rfind('/') + 1:]
+
+	while True:
+		print('Getting %s' % url)
+
+		page = requests.get(base_url + url)
+		page.raise_for_status() # throw on fail
+
+		soup = bs4.BeautifulSoup(page.text, "html.parser")
+
+		titles = soup.select('a[title]')
+		for title in titles:
+			link = title.attrs['href']
+
+			if 'id' in link and not 'dl' in link: # find content links
+				print('Found %s' % title.attrs['title'])
+				download_file(base_url + 'sub/enganime/' + get_file_name(base_url + link))
+
+		next_link = soup.select('span.pagenav_next > a')
+		if len(next_link) == 0:
+			print('End of site')
+			break
+
+		url = next_link[0].attrs['href']
+
+scrape_site('http://subs.com.ru/list.php?c=enganime&p=5&w=asc&d=1')