################################################################ # # # THE PERFECT BLENDER # # # # Version 1.0 # # Author: kaolincash # # Vice Author: neuralwarp # # # # Date: 23rd July 2018 # # Email: kaolin@maia.cash # # # ################################################################ ####################### Initialisation ######################### import os import re import sys import urllib import requests import tempfile import selenium from requests import get from selenium import webdriver from selenium.webdriver.common.keys import Keys from selenium.webdriver.support.ui import WebDriverWait from bs4 import BeautifulSoup from urllib.request import urlopen from selenium.webdriver.chrome.options import Options options = webdriver.ChromeOptions() options.add_argument('headless') options.add_argument('log-level=3') driver = webdriver.Chrome(chrome_options=options) Vlist = [] Flist = [] Dlist = [] DM = 'https://dailymotion.com' # Source domain DMV = 'H264-1280[^"]*' # Video url generator DLV = DM + '/cdn/' DIR = '//Olympus/Mnemosyne/Calliope/Neighbours' # Destination folder PATH = str(re.sub('[\\\]', '/', os.path.dirname(__file__))) # Relative path of this file H264 = 'H264-1280' vidget = 0 # Did we find a video? it = 0 # Iteration number # print('\n' + PATH) # Integrity check ########################### Input ############################# print('\nHow many episodes do you want to search?') do = int(input()) ####################### Searcher loop ######################### for _ in range(do): # Search, parse, and download episodes # Selenium opens the url and passes the dynamically generated code to its HTML parser while vidget is 0: if Vlist == []: try: # Get latest episode number in DIR and add 1 ep = int(os.path.splitext(max(os.listdir(DIR)))[0]) + 1 except: print('Error on pass ' + str(it + 1) + ' of ' + str(do) + ': ' + DIR + ' is probably empty.') print('I can\'t find the next episode if I don\'t know the last one\'s number!') print('To resolve, create a file here named with the number of the last episode you saw.') searchurl = DM + '/search/' + 'Neighbours%20' + str(ep) urlopen(searchurl) driver.get(searchurl) soup = BeautifulSoup(driver.page_source, 'html.parser') vurls = soup.find_all('a', class_='Video__details___vM5NB', href=True) Vlist = [] for vurl in vurls: vhref = DM + str(vurl['href']) Vlist = Vlist + [vhref] else: vhref = Vlist[it] urlopen(vhref) driver.get(vhref) # # Parse direct file url BS = BeautifulSoup(driver.page_source, 'html.parser') if re.search(H264, str(BS)): vdl = DLV + re.sub(r'\\', '', re.sub('"}]}', ' ', re.sub(H264, ' ' + H264, re.sub('\s', '', str(BS))).split()[1]).split()[0]) print('Obtained 720p file, adding to download queue.') # # # print('\n' + str(vdl) + '\n') # Integrity check for "https://dailymotion/cdn/H264-1280x720/[filename.mp4]?auth=[authcode]" vidget = 1 else: if it < 11: print('720p not found on iteration ' + str(it + 1) + '.') print('Trying next search result.') vidget = 0 it += 1 # # # If no 720p files found on first search page else: retry = 0 while retry is 0: print('No 720p files found. Retry? y/n') retry = str(input()) if retry is 'y': it = 0 elif retry is 'n': print('\nProgram terminated.') sys.exit() else: retry = 0 print('I only understand "y" and "n".') if vidget is 1: Flist.append(vdl) Dlist.append(ep) print('Episode ' + str(ep) + ' added to download queue.') ########################## Iterators ########################## ep += 1 vidget = 0 ########################## Downloader ######################### Flist.reverse() Dlist.reverse() while do > 0: DL = str(Flist[do-1]) FILE = str(DIR) + '/' + str(Dlist[do-1]) + '.mp4' with open(FILE, 'wb') as f: print('Downloading %s' % FILE) response = requests.get(DL, stream=True) total_length = response.headers.get('content-length') if total_length is None: f.write(response.content) else: DL = 0 total_length = int(total_length) for data in response.iter_content(chunk_size=4096): DL += len(data) f.write(data) done = int(50 * DL / total_length) sys.stdout.write('\r[%s%s]' % ('=' * done, ' ' * (50-done)) ) sys.stdout.flush() do -= 1 ########################### Logging ########################### # Enable/disable debug output # log = '/' + str(ep) + '_log.txt' # output = open(PATH + log, 'w+') # output.write(str(Vlist) + '\n') # output.close() # print('\nIteration #' + str(it)) #Integrity check # print('PATH = ' + PATH) #Integrity check # print('log = ' + log) #Integrity check ########################### Output ############################ print("\nDone.")