Fix line-endings; Increase portability; Add speedtyper.py
This commit is contained in:
@@ -1,66 +1,67 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import requests
|
||||
import bs4
|
||||
|
||||
def download_file(url):
|
||||
filename = url[url.rfind('/') + 1:]
|
||||
|
||||
print('Downloading %s' % filename)
|
||||
|
||||
file = requests.get(url)
|
||||
|
||||
try:
|
||||
file.raise_for_status()
|
||||
except:
|
||||
open(filename + '.failed', 'w')
|
||||
|
||||
with open(filename, 'wb') as f:
|
||||
for chunk in file.iter_content(100000):
|
||||
f.write(chunk)
|
||||
|
||||
f.close()
|
||||
|
||||
def get_file_name(url):
|
||||
page = requests.get(url)
|
||||
page.raise_for_status()
|
||||
|
||||
soup = bs4.BeautifulSoup(page.text, "html.parser")
|
||||
|
||||
cells = soup.select('td.even') # gay retardness
|
||||
for cell in cells:
|
||||
text = cell.getText()
|
||||
|
||||
if '.rar' in text or '.zip' in text or '.7z' in text:
|
||||
return text
|
||||
|
||||
|
||||
def scrape_site(url):
|
||||
# split the url to use later for constructing new urls
|
||||
base_url = url[:url.rfind('/') + 1]
|
||||
url = url[url.rfind('/') + 1:]
|
||||
|
||||
while True:
|
||||
print('Getting %s' % url)
|
||||
|
||||
page = requests.get(base_url + url)
|
||||
page.raise_for_status() # throw on fail
|
||||
|
||||
soup = bs4.BeautifulSoup(page.text, "html.parser")
|
||||
|
||||
titles = soup.select('a[title]')
|
||||
for title in titles:
|
||||
link = title.attrs['href']
|
||||
|
||||
if 'id' in link and not 'dl' in link: # find content links
|
||||
print('Found %s' % title.attrs['title'])
|
||||
download_file(base_url + 'sub/enganime/' + get_file_name(base_url + link))
|
||||
|
||||
next_link = soup.select('span.pagenav_next > a')
|
||||
if len(next_link) == 0:
|
||||
print('End of site')
|
||||
break
|
||||
|
||||
url = next_link[0].attrs['href']
|
||||
|
||||
scrape_site('http://subs.com.ru/list.php?c=enganime&p=5&w=asc&d=1')
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import requests
|
||||
import bs4
|
||||
|
||||
def download_file(url):
|
||||
filename = url[url.rfind('/') + 1:]
|
||||
|
||||
print('Downloading %s' % filename)
|
||||
|
||||
file = requests.get(url)
|
||||
|
||||
try:
|
||||
file.raise_for_status()
|
||||
except:
|
||||
open(filename + '.failed', 'w')
|
||||
|
||||
with open(filename, 'wb') as f:
|
||||
for chunk in file.iter_content(100000):
|
||||
f.write(chunk)
|
||||
|
||||
f.close()
|
||||
|
||||
def get_file_name(url):
|
||||
page = requests.get(url)
|
||||
page.raise_for_status()
|
||||
|
||||
soup = bs4.BeautifulSoup(page.text, "html.parser")
|
||||
|
||||
cells = soup.select('td.even') # gay retardness
|
||||
for cell in cells:
|
||||
text = cell.getText()
|
||||
|
||||
if '.rar' in text or '.zip' in text or '.7z' in text:
|
||||
return text
|
||||
|
||||
|
||||
def scrape_site(url):
|
||||
# split the url to use later for constructing new urls
|
||||
base_url = url[:url.rfind('/') + 1]
|
||||
url = url[url.rfind('/') + 1:]
|
||||
|
||||
while True:
|
||||
print('Getting %s' % url)
|
||||
|
||||
page = requests.get(base_url + url)
|
||||
page.raise_for_status() # throw on fail
|
||||
|
||||
soup = bs4.BeautifulSoup(page.text, "html.parser")
|
||||
|
||||
titles = soup.select('a[title]')
|
||||
for title in titles:
|
||||
link = title.attrs['href']
|
||||
|
||||
if 'id' in link and not 'dl' in link: # find content links
|
||||
print('Found %s' % title.attrs['title'])
|
||||
download_file(base_url + 'sub/enganime/' + get_file_name(base_url + link))
|
||||
|
||||
next_link = soup.select('span.pagenav_next > a')
|
||||
if len(next_link) == 0:
|
||||
print('End of site')
|
||||
break
|
||||
|
||||
url = next_link[0].attrs['href']
|
||||
|
||||
scrape_site('http://subs.com.ru/list.php?c=enganime&p=5&w=asc&d=1')
|
||||
|
||||
Reference in New Issue
Block a user