정보&강의 파이썬으로 만든 Youtube 다운로드 프로그램
- 운영자
- 2868
- 0
첨부 3
- python_youtube_download.png (File Size: 33.3KB/Download: 14)
- 이미지 3.png (File Size: 799.8KB/Download: 6)
- youtube_download.zip (File Size: 4.79MB/Download: 383)
# -*- conding: utf-8 -*- import os import subprocess import pytube if not os.path.exists("download"): os.mkdir("download") url = input("다운받을 유튜브 링크를 입력하세요. : ") yt = pytube.YouTube(url) vids= yt.streams.all() #영상 형식 리스트 확인SyntaxError: Non-UTF-8 code starting with for i in range(len(vids)): print(i,'. ',vids[i]) vnum = int(input("다운 받을 화질은? ")) parent_dir = "./download" try: vids[vnum].download(parent_dir) #다운로드 수행 except OSError as err: print("OS error: {0}".format(err)) except ValueError: print("Could not convert data to an integer.") except: print("Unexpected error:", sys.exc_info()[0]) raise print('동영상 다운로드 완료!')
유튜브 다운로드 프로그램 소스입니다.
실행파일은 첨부파일로 다운로드 받으시면 됩니다.
403 오류가 발생하면, 아래와 같이 pytube의 파일을 수정한다.
# -*- coding: utf-8 -*- """ This module countains all logic necessary to decipher the signature. YouTube's strategy to restrict downloading videos is to send a ciphered version of the signature to the client, along with the decryption algorithm obfuscated in JavaScript. For the clients to play the videos, JavaScript must take the ciphered version, cycle it through a series of "transform functions," and then signs the media URL with the output. This module is responsible for (1) finding and extracting those "transform functions" (2) maps them to Python equivalents and (3) taking the ciphered signature and decoding it. """ from __future__ import absolute_import import logging import pprint import re from itertools import chain from pytube.exceptions import RegexMatchError from pytube.helpers import regex_search logger = logging.getLogger(__name__) def get_initial_function_name(js): """Extract the name of the function responsible for computing the signature. :param str js: The contents of the base.js asset file. """ # c&&d.set("signature", EE(c)); pattern = [ r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(', r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(', r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)', r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(', r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<si$', r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(', r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(', r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(' ] logger.debug('finding initial function name') return regex_search(pattern, js, group=1) def get_transform_plan(js): """Extract the "transform plan". The "transform plan" is the functions that the ciphered signature is cycled through to obtain the actual signature. :param str js: The contents of the base.js asset file. **Example**: >>> get_transform_plan(js) ['DE.AJ(a,15)', 'DE.VR(a,3)', 'DE.AJ(a,51)', 'DE.VR(a,3)', 'DE.kT(a,51)', 'DE.kT(a,8)', 'DE.VR(a,3)', 'DE.kT(a,21)'] """ name = re.escape(get_initial_function_name(js)) pattern = r'%s=function\(\w\){[a-z=\.\(\"\)]*;(.*);(?:.+)}' % name logger.debug('getting transform plan') return regex_search(pattern, js, group=1).split(';') def get_transform_object(js, var): """Extract the "transform object". The "transform object" contains the function definitions referenced in the "transform plan". The ``var`` argument is the obfuscated variable name which contains these functions, for example, given the function call ``DE.AJ(a,15)`` returned by the transform plan, "DE" would be the var. :param str js: The contents of the base.js asset file. :param str var: The obfuscated variable name that stores an object with all functions that descrambles the signature. **Example**: >>> get_transform_object(js, 'DE') ['AJ:function(a){a.reverse()}', 'VR:function(a,b){a.splice(0,b)}', 'kT:function(a,b){var c=a[0];a[0]=a[b%a.length];a[b]=c}'] """ pattern = r'var %s={(.*?)};' % re.escape(var) logger.debug('getting transform object') return ( regex_search(pattern, js, group=1, flags=re.DOTALL) .replace('\n', ' ') .split(', ') ) def get_transform_map(js, var): """Build a transform function lookup. Build a lookup table of obfuscated JavaScript function names to the Python equivalents. :param str js: The contents of the base.js asset file. :param str var: The obfuscated variable name that stores an object with all functions that descrambles the signature. """ transform_object = get_transform_object(js, var) mapper = {} for obj in transform_object: # AJ:function(a){a.reverse()} => AJ, function(a){a.reverse()} name, function = obj.split(':', 1) fn = map_functions(function) mapper[name] = fn return mapper def reverse(arr, b): """Reverse elements in a list. This function is equivalent to: .. code-block:: javascript function(a, b) { a.reverse() } This method takes an unused ``b`` variable as their transform functions universally sent two arguments. **Example**: >>> reverse([1, 2, 3, 4]) [4, 3, 2, 1] """ return arr[::-1] def splice(arr, b): """Add/remove items to/from a list. This function is equivalent to: .. code-block:: javascript function(a, b) { a.splice(0, b) } **Example**: >>> splice([1, 2, 3, 4], 2) [1, 2] """ return arr[:b] + arr[b * 2:] def swap(arr, b): """Swap positions at b modulus the list length. This function is equivalent to: .. code-block:: javascript function(a, b) { var c=a[0];a[0]=a[b%a.length];a[b]=c } **Example**: >>> swap([1, 2, 3, 4], 2) [3, 2, 1, 4] """ r = b % len(arr) return list(chain([arr[r]], arr[1:r], [arr[0]], arr[r + 1:])) def map_functions(js_func): """For a given JavaScript transform function, return the Python equivalent. :param str js_func: The JavaScript version of the transform function. """ mapper = ( # function(a){a.reverse()} ('{\w\.reverse\(\)}', reverse), # function(a,b){a.splice(0,b)} ('{\w\.splice\(0,\w\)}', splice), # function(a,b){var c=a[0];a[0]=a[b%a.length];a[b]=c} ('{var\s\w=\w\[0\];\w\[0\]=\w\[\w\%\w.length\];\w\[\w\]=\w}', swap), # function(a,b){var c=a[0];a[0]=a[b%a.length];a[b%a.length]=c} ( '{var\s\w=\w\[0\];\w\[0\]=\w\[\w\%\w.length\];' '\w\[\w\%\w.length\]=\w}', swap, ), ) for pattern, fn in mapper: if re.search(pattern, js_func): return fn raise RegexMatchError( 'could not find python equivalent function for: ', js_func, ) def parse_function(js_func): """Parse the Javascript transform function. Break a JavaScript transform function down into a two element ``tuple`` containing the function name and some integer-based argument. :param str js_func: The JavaScript version of the transform function. :rtype: tuple :returns: two element tuple containing the function name and an argument. **Example**: >>> parse_function('DE.AJ(a,15)') ('AJ', 15) """ logger.debug('parsing transform function') return regex_search(r'\w+\.(\w+)\(\w,(\d+)\)', js_func, groups=True) def get_signature(js, ciphered_signature): """Decipher the signature. Taking the ciphered signature, applies the transform functions. :param str js: The contents of the base.js asset file. :param str ciphered_signature: The ciphered signature sent in the ``player_config``. :rtype: str :returns: Decrypted signature required to download the media content. """ tplan = get_transform_plan(js) # DE.AJ(a,15) => DE, AJ(a,15) var, _ = tplan[0].split('.') tmap = get_transform_map(js, var) signature = [s for s in ciphered_signature] for js_func in tplan: name, argument = parse_function(js_func) signature = tmap[name](signature, int(argument)) logger.debug( 'applied transform function\n%s', pprint.pformat( { 'output': ''.join(signature), 'js_function': name, 'argument': int(argument), 'function': tmap[name], }, indent=2, ), ) return ''.join(signature)
# -*- coding: utf-8 -*- """Applies in-place data mutations.""" from __future__ import absolute_import import json import logging import pprint from pytube import cipher from pytube.compat import parse_qsl from pytube.compat import unquote from pytube.exceptions import LiveStreamError logger = logging.getLogger(__name__) def apply_signature(config_args, fmt, js): """Apply the decrypted signature to the stream manifest. :param dict config_args: Details of the media streams available. :param str fmt: Key in stream manifests (``ytplayer_config``) containing progressive download or adaptive streams (e.g.: ``url_encoded_fmt_stream_map`` or ``adaptive_fmts``). :param str js: The contents of the base.js asset file. """ stream_manifest = config_args[fmt] live_stream = json.loads(config_args['player_response']).get( 'playabilityStatus', {}, ).get('liveStreamability') for i, stream in enumerate(stream_manifest): if 'url' in stream: url = stream['url'] elif live_stream: raise LiveStreamError('Video is currently being streamed live') if ('signature=' in url or ('s' not in stream and ('&sig=' in url or '&lsig=' in url))): # For certain videos, YouTube will just provide them pre-signed, in # which case there's no real magic to download them and we can skip # the whole signature descrambling entirely. logger.debug('signature found, skip decipher') continue if js is not None: signature = cipher.get_signature(js, stream['s']) else: # signature not present in url (line 33), need js to descramble # TypeError caught in __main__ raise TypeError('JS is None') logger.debug( 'finished descrambling signature for itag=%s\n%s', stream['itag'], pprint.pformat( { 's': stream['s'], 'signature': signature, }, indent=2, ), ) stream_manifest[i]['url'] = url + '&sig=' + signature def apply_descrambler(stream_data, key): """Apply various in-place transforms to YouTube's media stream data. Creates a ``list`` of dictionaries by string splitting on commas, then taking each list item, parsing it as a query string, converting it to a ``dict`` and unquoting the value. :param dict dct: Dictionary containing query string encoded values. :param str key: Name of the key in dictionary. **Example**: >>> d = {'foo': 'bar=1&var=test,em=5&t=url%20encoded'} >>> apply_descrambler(d, 'foo') >>> print(d) {'foo': [{'bar': '1', 'var': 'test'}, {'em': '5', 't': 'url encoded'}]} """ stream_data[key] = [ {k: unquote(v) for k, v in parse_qsl(i)} for i in stream_data[key].split(',') ] logger.debug( 'applying descrambler\n%s', pprint.pformat(stream_data[key], indent=2), )
이런 글도 찾아보세요!
댓글 0
권한이 없습니다. 로그인