Fix line-endings; Increase portability; Add speedtyper.py

2019-05-26 23:05:28 +03:00
parent 661a5984a3
commit 63a1b4f501
33 changed files with 1447 additions and 1341 deletions
--- a/python/web/subscomru_scraper.py
+++ b/python/web/subscomru_scraper.py
@@ -1,66 +1,67 @@
-#!/usr/bin/env python3
-
-import requests
-import bs4
-
-def download_file(url):
-	filename = url[url.rfind('/') + 1:]
-
-	print('Downloading %s' % filename)
-
-	file = requests.get(url)
-
-	try:
-		file.raise_for_status()
-	except:
-		open(filename + '.failed', 'w')
-
-	with open(filename, 'wb') as f:
-		for chunk in file.iter_content(100000):
-			f.write(chunk)
-
-		f.close()
-
-def get_file_name(url):
-	page = requests.get(url)
-	page.raise_for_status()
-
-	soup = bs4.BeautifulSoup(page.text, "html.parser")
-
-	cells = soup.select('td.even') # gay retardness
-	for cell in cells:
-		text = cell.getText()
-
-		if '.rar' in text or '.zip' in text or '.7z' in text:
-			return text
-
-
-def scrape_site(url):
-	# split the url to use later for constructing new urls
-	base_url = url[:url.rfind('/') + 1]
-	url = url[url.rfind('/') + 1:]
-
-	while True:
-		print('Getting %s' % url)
-
-		page = requests.get(base_url + url)
-		page.raise_for_status() # throw on fail
-
-		soup = bs4.BeautifulSoup(page.text, "html.parser")
-
-		titles = soup.select('a[title]')
-		for title in titles:
-			link = title.attrs['href']
-
-			if 'id' in link and not 'dl' in link: # find content links
-				print('Found %s' % title.attrs['title'])
-				download_file(base_url + 'sub/enganime/' + get_file_name(base_url + link))
-
-		next_link = soup.select('span.pagenav_next > a')
-		if len(next_link) == 0:
-			print('End of site')
-			break
-
-		url = next_link[0].attrs['href']
-
-scrape_site('http://subs.com.ru/list.php?c=enganime&p=5&w=asc&d=1')
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import requests
+import bs4
+
+def download_file(url):
+	filename = url[url.rfind('/') + 1:]
+
+	print('Downloading %s' % filename)
+
+	file = requests.get(url)
+
+	try:
+		file.raise_for_status()
+	except:
+		open(filename + '.failed', 'w')
+
+	with open(filename, 'wb') as f:
+		for chunk in file.iter_content(100000):
+			f.write(chunk)
+
+		f.close()
+
+def get_file_name(url):
+	page = requests.get(url)
+	page.raise_for_status()
+
+	soup = bs4.BeautifulSoup(page.text, "html.parser")
+
+	cells = soup.select('td.even') # gay retardness
+	for cell in cells:
+		text = cell.getText()
+
+		if '.rar' in text or '.zip' in text or '.7z' in text:
+			return text
+
+
+def scrape_site(url):
+	# split the url to use later for constructing new urls
+	base_url = url[:url.rfind('/') + 1]
+	url = url[url.rfind('/') + 1:]
+
+	while True:
+		print('Getting %s' % url)
+
+		page = requests.get(base_url + url)
+		page.raise_for_status() # throw on fail
+
+		soup = bs4.BeautifulSoup(page.text, "html.parser")
+
+		titles = soup.select('a[title]')
+		for title in titles:
+			link = title.attrs['href']
+
+			if 'id' in link and not 'dl' in link: # find content links
+				print('Found %s' % title.attrs['title'])
+				download_file(base_url + 'sub/enganime/' + get_file_name(base_url + link))
+
+		next_link = soup.select('span.pagenav_next > a')
+		if len(next_link) == 0:
+			print('End of site')
+			break
+
+		url = next_link[0].attrs['href']
+
+scrape_site('http://subs.com.ru/list.php?c=enganime&p=5&w=asc&d=1')