diff --git a/youtube_dl/extractor/cnbc.py b/youtube_dl/extractor/cnbc.py index 7b9f4536a..aa5d9820b 100644 --- a/youtube_dl/extractor/cnbc.py +++ b/youtube_dl/extractor/cnbc.py @@ -2,23 +2,29 @@ from __future__ import unicode_literals import re +import datetime +import calendar +import json from .common import InfoExtractor -from ..utils import smuggle_url +from ..utils import js_to_json, int_or_none class CNBCIE(InfoExtractor): - _VALID_URL = r'https?://video\.cnbc\.com/gallery/\?video=(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?cnbc\.com(?P/video/(?:[^/]+/)+(?P[^./?#&]+)\.html)' _TEST = { - 'url': 'http://video.cnbc.com/gallery/?video=3000503714', + 'url': 'https://www.cnbc.com/video/2020/07/06/gary-shilling-why-the-stock-market-could-be-set-for-a-big-decline.html', 'info_dict': { - 'id': '3000503714', + 'id': 'gary-shilling-why-the-stock-market-could-be-set-for-a-big-decline', 'ext': 'mp4', - 'title': 'Fighting zombies is big business', - 'description': 'md5:0c100d8e1a7947bd2feec9a5550e519e', - 'timestamp': 1459332000, - 'upload_date': '20160330', + 'title': 'Gary Shilling: Why the stock market could be set for a big decline', + 'alt_title': 'Why the stock market could be set for a big decline, according to financial analyst Gary Shilling', + 'description': 'Financial analyst Gary Shilling says the stock market could be set for a big pullback similar to the decline in the 1930s during the Great Depression. He explains how the coronavirus pandemic will result in long-term changes in the economy.', + 'thumbnail': 'https://image.cnbcfm.com/api/v1/image/106592286-gettyimages-103220190.jpg?v=1594046174', 'uploader': 'NBCU-CNBC', + 'timestamp': 1594046033, + 'upload_date': '20200706', + 'duration': 658 }, 'params': { # m3u8 download @@ -27,45 +33,110 @@ class CNBCIE(InfoExtractor): } def _real_extract(self, url): - video_id = self._match_id(url) + path, video_id = re.match(self._VALID_URL, url).groups() + video_webpage = self._download_webpage( + url, + video_id, + note='Downloading webpage to get video info' + ) + video_info_js = self._search_regex( + r'window.__s_data=(.*); window.__c_data=', + video_webpage, + 's_data', + ) + video_info = json.loads(js_to_json(video_info_js)) + core_data = None + for layout in video_info['page']['page']['layout']: + for column in layout['columns']: + data = column['modules'][0]['data'] + if 'playbackURL' not in data: + continue + else: + core_data = data + break + if core_data: + break + formats = [] + m3u8_url = core_data['playbackURL'].replace('\u002F', '/') + for entry in self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', m3u8_id='m3u8'): + mobj = re.search(r'(?P(?:-p|-b)).m3u8', entry['url']) + if mobj: + entry['format_id'] += mobj.group('tag') + formats.append(entry) + self._sort_formats(formats) return { - '_type': 'url_transparent', - 'ie_key': 'ThePlatform', - 'url': smuggle_url( - 'http://link.theplatform.com/s/gZWlPC/media/guid/2408950221/%s?mbr=true&manifest=m3u' % video_id, - {'force_smil_url': True}), 'id': video_id, + 'title': core_data['seoTitle'], + 'alt_title': core_data['title'], + 'description': core_data['description'], + 'formats': formats, + 'thumbnail': core_data['thumbnail'].replace('\u002F', '/'), + 'uploader': 'NBCU-CNBC', + 'timestamp': calendar.timegm(datetime.datetime.strptime( + core_data['dateFirstPublished'], + "%Y-%m-%dT%H:%M:%S+0000" + ).timetuple()), + 'upload_date': core_data['dateFirstPublished'][:10].replace('-', ''), + 'duration': int_or_none(core_data['duration']) } -class CNBCVideoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?cnbc\.com(?P/video/(?:[^/]+/)+(?P[^./?#&]+)\.html)' +class CNBCPlayerIE(InfoExtractor): + _VALID_URL = r'https?://player.cnbc.com/p/gZWlPC/cnbc_global\?playertype=synd&byGuid=(?P[0-9]+)&?.*' _TEST = { - 'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html', + 'url': 'https://player.cnbc.com/p/gZWlPC/cnbc_global?playertype=synd&byGuid=7000142698', 'info_dict': { - 'id': '7000031301', + 'id': '7000142698', 'ext': 'mp4', - 'title': "Trump: I don't necessarily agree with raising rates", - 'description': 'md5:878d8f0b4ebb5bb1dda3514b91b49de3', - 'timestamp': 1531958400, - 'upload_date': '20180719', + 'title': 'Gary Shilling: Why the stock market could be set for a big decline', + 'alt_title': 'Why the stock market could be set for a big decline, according to financial analyst Gary Shilling', + 'description': 'Financial analyst Gary Shilling says the stock market could be set for a big pullback similar to the decline in the 1930s during the Great Depression. He explains how the coronavirus pandemic will result in long-term changes in the economy.', + 'thumbnail': 'https://image.cnbcfm.com/api/v1/image/106592286-gettyimages-103220190.jpg?v=1594046174', 'uploader': 'NBCU-CNBC', + 'timestamp': 1594046033, + 'upload_date': '20200706', + 'duration': 658 }, 'params': { + # m3u8 download 'skip_download': True, }, } def _real_extract(self, url): - path, display_id = re.match(self._VALID_URL, url).groups() - video_id = self._download_json( - 'https://webql-redesign.cnbcfm.com/graphql', display_id, query={ - 'query': '''{ - page(path: "%s") { - vcpsId - } -}''' % path, - })['data']['page']['vcpsId'] - return self.url_result( - 'http://video.cnbc.com/gallery/?video=%d' % video_id, - CNBCIE.ie_key()) + video_id, = re.match(self._VALID_URL, url).groups() + url = 'https://player.cnbc.com/p/gZWlPC/cnbc_global?playertype=synd&byGuid=%s' % video_id + video_webpage = self._download_webpage( + url, + video_id, + note='Downloading webpage to get video info' + ) + video_info = self._search_json( + '