로그인

  • 목록
  • 아래로
  • 위로
  • 쓰기
  • 검색

정보&강의 파이썬으로 만든 Youtube 다운로드 프로그램

첨부 3

  1. python_youtube_download.png (File Size: 33.3KB/Download: 14)
  2. 이미지 3.png (File Size: 799.8KB/Download: 6)
  3. youtube_download.zip (File Size: 4.79MB/Download: 383)

# -*- conding: utf-8 -*-

import os
import subprocess
import pytube

if not os.path.exists("download"):
    os.mkdir("download")

url = input("다운받을 유튜브 링크를 입력하세요. : ")
yt = pytube.YouTube(url)

vids= yt.streams.all()

#영상 형식 리스트 확인SyntaxError: Non-UTF-8 code starting with
for i in range(len(vids)):
    print(i,'. ',vids[i])

vnum = int(input("다운 받을 화질은? "))

parent_dir = "./download"

try:
    vids[vnum].download(parent_dir) #다운로드 수행
except OSError as err:
    print("OS error: {0}".format(err))
except ValueError:
    print("Could not convert data to an integer.")
except:
    print("Unexpected error:", sys.exc_info()[0])
    raise

print('동영상 다운로드 완료!')

 

유튜브 다운로드 프로그램 소스입니다.

실행파일은 첨부파일로 다운로드 받으시면 됩니다.

 

python_youtube_download.png

 

이미지 3.png

 

 

403 오류가 발생하면, 아래와 같이 pytube의 파일을 수정한다.

 

# -*- coding: utf-8 -*-
"""
This module countains all logic necessary to decipher the signature.

YouTube's strategy to restrict downloading videos is to send a ciphered version
of the signature to the client, along with the decryption algorithm obfuscated
in JavaScript. For the clients to play the videos, JavaScript must take the
ciphered version, cycle it through a series of "transform functions," and then
signs the media URL with the output.

This module is responsible for (1) finding and extracting those "transform
functions" (2) maps them to Python equivalents and (3) taking the ciphered
signature and decoding it.

"""
from __future__ import absolute_import

import logging
import pprint
import re
from itertools import chain

from pytube.exceptions import RegexMatchError
from pytube.helpers import regex_search


logger = logging.getLogger(__name__)


def get_initial_function_name(js):
    """Extract the name of the function responsible for computing the signature.

    :param str js:
        The contents of the base.js asset file.

    """
    # c&&d.set("signature", EE(c));
    pattern = [
        r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
        r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
        r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
        r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
        r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
        r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<si$',
        r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
        r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
        r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
        r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
        r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('
    ]
    logger.debug('finding initial function name')
    return regex_search(pattern, js, group=1)


def get_transform_plan(js):
    """Extract the "transform plan".

    The "transform plan" is the functions that the ciphered signature is
    cycled through to obtain the actual signature.

    :param str js:
        The contents of the base.js asset file.

    **Example**:

    >>> get_transform_plan(js)
    ['DE.AJ(a,15)',
    'DE.VR(a,3)',
    'DE.AJ(a,51)',
    'DE.VR(a,3)',
    'DE.kT(a,51)',
    'DE.kT(a,8)',
    'DE.VR(a,3)',
    'DE.kT(a,21)']
    """
    name = re.escape(get_initial_function_name(js))
    pattern = r'%s=function\(\w\){[a-z=\.\(\"\)]*;(.*);(?:.+)}' % name
    logger.debug('getting transform plan')
    return regex_search(pattern, js, group=1).split(';')


def get_transform_object(js, var):
    """Extract the "transform object".

    The "transform object" contains the function definitions referenced in the
    "transform plan". The ``var`` argument is the obfuscated variable name
    which contains these functions, for example, given the function call
    ``DE.AJ(a,15)`` returned by the transform plan, "DE" would be the var.

    :param str js:
        The contents of the base.js asset file.
    :param str var:
        The obfuscated variable name that stores an object with all functions
        that descrambles the signature.

    **Example**:

    >>> get_transform_object(js, 'DE')
    ['AJ:function(a){a.reverse()}',
    'VR:function(a,b){a.splice(0,b)}',
    'kT:function(a,b){var c=a[0];a[0]=a[b%a.length];a[b]=c}']

    """
    pattern = r'var %s={(.*?)};' % re.escape(var)
    logger.debug('getting transform object')
    return (
        regex_search(pattern, js, group=1, flags=re.DOTALL)
        .replace('\n', ' ')
        .split(', ')
    )


def get_transform_map(js, var):
    """Build a transform function lookup.

    Build a lookup table of obfuscated JavaScript function names to the
    Python equivalents.

    :param str js:
        The contents of the base.js asset file.
    :param str var:
        The obfuscated variable name that stores an object with all functions
        that descrambles the signature.

    """
    transform_object = get_transform_object(js, var)
    mapper = {}
    for obj in transform_object:
        # AJ:function(a){a.reverse()} => AJ, function(a){a.reverse()}
        name, function = obj.split(':', 1)
        fn = map_functions(function)
        mapper[name] = fn
    return mapper


def reverse(arr, b):
    """Reverse elements in a list.

    This function is equivalent to:

    .. code-block:: javascript

       function(a, b) { a.reverse() }

    This method takes an unused ``b`` variable as their transform functions
    universally sent two arguments.

    **Example**:

    >>> reverse([1, 2, 3, 4])
    [4, 3, 2, 1]
    """
    return arr[::-1]


def splice(arr, b):
    """Add/remove items to/from a list.

    This function is equivalent to:

    .. code-block:: javascript

       function(a, b) { a.splice(0, b) }

    **Example**:

    >>> splice([1, 2, 3, 4], 2)
    [1, 2]
    """
    return arr[:b] + arr[b * 2:]


def swap(arr, b):
    """Swap positions at b modulus the list length.

    This function is equivalent to:

    .. code-block:: javascript

       function(a, b) { var c=a[0];a[0]=a[b%a.length];a[b]=c }

    **Example**:

    >>> swap([1, 2, 3, 4], 2)
    [3, 2, 1, 4]
    """
    r = b % len(arr)
    return list(chain([arr[r]], arr[1:r], [arr[0]], arr[r + 1:]))


def map_functions(js_func):
    """For a given JavaScript transform function, return the Python equivalent.

    :param str js_func:
        The JavaScript version of the transform function.

    """
    mapper = (
        # function(a){a.reverse()}
        ('{\w\.reverse\(\)}', reverse),
        # function(a,b){a.splice(0,b)}
        ('{\w\.splice\(0,\w\)}', splice),
        # function(a,b){var c=a[0];a[0]=a[b%a.length];a[b]=c}
        ('{var\s\w=\w\[0\];\w\[0\]=\w\[\w\%\w.length\];\w\[\w\]=\w}', swap),
        # function(a,b){var c=a[0];a[0]=a[b%a.length];a[b%a.length]=c}
        (
            '{var\s\w=\w\[0\];\w\[0\]=\w\[\w\%\w.length\];'
            '\w\[\w\%\w.length\]=\w}', swap,
        ),
    )

    for pattern, fn in mapper:
        if re.search(pattern, js_func):
            return fn
    raise RegexMatchError(
        'could not find python equivalent function for: ',
        js_func,
    )


def parse_function(js_func):
    """Parse the Javascript transform function.

    Break a JavaScript transform function down into a two element ``tuple``
    containing the function name and some integer-based argument.

    :param str js_func:
        The JavaScript version of the transform function.
    :rtype: tuple
    :returns:
        two element tuple containing the function name and an argument.

    **Example**:

    >>> parse_function('DE.AJ(a,15)')
    ('AJ', 15)

    """
    logger.debug('parsing transform function')
    return regex_search(r'\w+\.(\w+)\(\w,(\d+)\)', js_func, groups=True)


def get_signature(js, ciphered_signature):
    """Decipher the signature.

    Taking the ciphered signature, applies the transform functions.

    :param str js:
        The contents of the base.js asset file.
    :param str ciphered_signature:
        The ciphered signature sent in the ``player_config``.
    :rtype: str
    :returns:
       Decrypted signature required to download the media content.

    """
    tplan = get_transform_plan(js)
    # DE.AJ(a,15) => DE, AJ(a,15)
    var, _ = tplan[0].split('.')
    tmap = get_transform_map(js, var)
    signature = [s for s in ciphered_signature]

    for js_func in tplan:
        name, argument = parse_function(js_func)
        signature = tmap[name](signature, int(argument))
        logger.debug(
            'applied transform function\n%s', pprint.pformat(
                {
                    'output': ''.join(signature),
                    'js_function': name,
                    'argument': int(argument),
                    'function': tmap[name],
                }, indent=2,
            ),
        )
    return ''.join(signature)

 

 

# -*- coding: utf-8 -*-
"""Applies in-place data mutations."""
from __future__ import absolute_import

import json
import logging
import pprint

from pytube import cipher
from pytube.compat import parse_qsl
from pytube.compat import unquote
from pytube.exceptions import LiveStreamError


logger = logging.getLogger(__name__)


def apply_signature(config_args, fmt, js):
    """Apply the decrypted signature to the stream manifest.

    :param dict config_args:
        Details of the media streams available.
    :param str fmt:
        Key in stream manifests (``ytplayer_config``) containing progressive
        download or adaptive streams (e.g.: ``url_encoded_fmt_stream_map`` or
        ``adaptive_fmts``).
    :param str js:
        The contents of the base.js asset file.

    """
    stream_manifest = config_args[fmt]
    live_stream = json.loads(config_args['player_response']).get(
        'playabilityStatus', {},
    ).get('liveStreamability')
    for i, stream in enumerate(stream_manifest):
        if 'url' in stream:
            url = stream['url']
        elif live_stream:
            raise LiveStreamError('Video is currently being streamed live')

        if ('signature=' in url or 
                ('s' not in stream and 
                 ('&sig=' in url or '&lsig=' in url))):
            # For certain videos, YouTube will just provide them pre-signed, in
            # which case there's no real magic to download them and we can skip
            # the whole signature descrambling entirely.
            logger.debug('signature found, skip decipher')
            continue

        if js is not None:
            signature = cipher.get_signature(js, stream['s'])
        else:
            # signature not present in url (line 33), need js to descramble
            # TypeError caught in __main__
            raise TypeError('JS is None')

        logger.debug(
            'finished descrambling signature for itag=%s\n%s',
            stream['itag'], pprint.pformat(
                {
                    's': stream['s'],
                    'signature': signature,
                }, indent=2,
            ),
        )
        stream_manifest[i]['url'] = url + '&sig=' + signature


def apply_descrambler(stream_data, key):
    """Apply various in-place transforms to YouTube's media stream data.

    Creates a ``list`` of dictionaries by string splitting on commas, then
    taking each list item, parsing it as a query string, converting it to a
    ``dict`` and unquoting the value.

    :param dict dct:
        Dictionary containing query string encoded values.
    :param str key:
        Name of the key in dictionary.

    **Example**:

    >>> d = {'foo': 'bar=1&var=test,em=5&t=url%20encoded'}
    >>> apply_descrambler(d, 'foo')
    >>> print(d)
    {'foo': [{'bar': '1', 'var': 'test'}, {'em': '5', 't': 'url encoded'}]}

    """
    stream_data[key] = [
        {k: unquote(v) for k, v in parse_qsl(i)}
        for i in stream_data[key].split(',')
    ]
    logger.debug(
        'applying descrambler\n%s',
        pprint.pformat(stream_data[key], indent=2),
    )

 


이런 글도 찾아보세요!

공유

facebooktwitterpinterestbandkakao story
퍼머링크

댓글 0

권한이 없습니다. 로그인

신고

"님의 댓글"

이 댓글을 신고 하시겠습니까?

삭제

"님의 댓글"

이 댓글을 삭제하시겠습니까?