Содержание: вступление, что будет соскрешено, код, ссылки, OUTRO.
вступление
Этот пост блога – это прекращение предыдущего поста в блоге, где мы выскабливаем поиск видео, AD, результаты канала. Этот пост блога будет содержать информацию о том, как просеивать плейлисты, кино и категории из результатов поиска YouTube.
Что будет соскрешено
Результаты плейлистов
Результаты фильмов
Результаты категории
Код
Итоги воспроизведения
import json from selenium import webdriver from selenium.webdriver.chrome.options import Options def get_video_paylist_results(): options = Options() # running selenium in headless mode options.headless = True driver = webdriver.Chrome(options=options) driver.get('https://www.youtube.com/results?search_query=dnb playlist') youtube_playlist = [] for result in driver.find_elements_by_xpath(): playlist_title = result.find_element_by_css_selector('#video-title').text playlist_link = result.find_element_by_css_selector('.style-scope ytd-playlist-renderer a').get_attribute('href') channel_name = result.find_element_by_css_selector('#channel-name').text video_count = result.find_element_by_css_selector('#overlays > ytd-thumbnail-overlay-side-panel-renderer > yt-formatted-string').text youtube_playlist.append({ 'title': playlist_title, 'link': playlist_link, 'count': video_count, 'channel': channel_name, }) print(json.dumps(youtube_playlist, indent=2, ensure_ascii=False)) get_video_paylist_results() # part of the output: ''' [ { "title": "Drum & Bass Hits Playlist - Top 100 DnB Songs of 2021", "link": "https://www.youtube.com/watch?v=y3Ko9pP6XAY&list=PLMmqTuUsDkRIZ1C1T2AsVz5XIxtVDfSOe", "count": "100", "channel": "Redlist - World Hits" } ] '''
Результаты фильмов
from selenium import webdriver from selenium.webdriver.chrome.options import Options def get_movie_results(): options = Options() # running selenium in headless mode options.headless = True driver = webdriver.Chrome(options=options) driver.get('https://www.youtube.com/results?search_query=mortal kombat 2021 movie') for result in driver.find_elements_by_xpath(): title = result.find_element_by_xpath().text link = result.find_element_by_xpath().get_attribute('href') movie_info = result.find_element_by_xpath().text desc = result.find_element_by_xpath().text additional_desc = result.find_element_by_xpath().text channel_name = result.find_element_by_xpath().text channel_link = result.find_element_by_xpath().get_attribute('href') print(f'{title}\n{link}\n{movie_info}\n{desc}\n{additional_desc}\n{channel_name}\n{channel_link}\n') get_movie_results() # output: ''' Mortal Kombat (2021) Action & Adventure • 2021 • English audio (and 8 more) MMA fighter Cole Young, accustomed to taking a beating for money, is unaware of his heritage—or why Outworld's Emperor ... Actors: Lewis Tan, Jessica McNamee, Josh Lawson Director: Simon McQuoid YouTube Movies https://www.youtube.com/channel/UClgRkhTL3_hImCAmdLfDE4g '''
Результаты категории
from selenium import webdriver from selenium.webdriver.chrome.options import Options def get_category_results(): options = Options() # running selenium in headless mode options.headless = True driver = webdriver.Chrome(options=options) driver.get('https://www.youtube.com/results?search_query=mojang') for result in driver.find_elements_by_css_selector('#contents > ytd-vertical-list-renderer'): title = result.find_element_by_css_selector('#video-title > yt-formatted-string').text link = result.find_element_by_css_selector('#video-title').get_attribute('href') views = result.find_element_by_css_selector('#metadata-line > span:nth-child(1)').text date_posted = result.find_element_by_css_selector('#metadata-line > span:nth-child(2)').text snippet = result.find_element_by_css_selector('#dismissible > div > div.metadata-snippet-container.style-scope.ytd-video-renderer > yt-formatted-string').text channel_name = result.find_element_by_css_selector('.long-byline').text channel_link = result.find_element_by_css_selector('#text > a').get_attribute('href') try: badges = result.find_element_by_css_selector('#badges').text except: badges = None print(f'{title}\n{link}\n{views}\n{date_posted}\n{snippet}\n{channel_name}\n{channel_link}\n{badges}\n') get_category_results() # part of the output: ''' Ask Mojang #18: More Mobs! 574K views 2 days ago Matthew, Anna, and Thommy answer your questions about the glorious dirt block. Just kidding, they're here to talk about mobs! Minecraft https://www.youtube.com/user/TeamMojang New CC '''
Использование API результатов плейлиста YouTube
from serpapi import GoogleSearch def get_video_paylist_results(): params = { "api_key": "YOUR_API_KEY", "engine": "youtube", "search_query": "dnb playlist" } search = GoogleSearch(params) results = search.get_dict() for result in results['playlist_results']: playlist_title = result['title'] playlist_link = result['link'] videos_count = result['video_count'] playlist_videos = result['videos'] print(f'{playlist_title}\n{playlist_link}\n{videos_count}\n{playlist_videos}') get_video_paylist_results() # part of the output: ''' Drum & Bass Hits Playlist - Top 100 DnB Songs of 2021 100 [{'title': 'Bru-C x Bou - Streetside [Music Video]', 'link': 'https://www.youtube.com/watch?v=y3Ko9pP6XAY&list=PLMmqTuUsDkRIZ1C1T2AsVz5XIxtVDfSOe', 'length': '3:40'}, {'title': 'Wiguez & Vizzen - Love Me Better [NCS Release]', 'link': 'https://www.youtube.com/watch?v=RrmL-R-2f28&list=PLMmqTuUsDkRIZ1C1T2AsVz5XIxtVDfSOe', 'length': '2:53'}] Best of Drum & Bass/Liquid DnB Playlist 296 [{'title': 'Maduk ft Veela - Ghost Assassin', 'link': 'https://www.youtube.com/watch?v=tEcggRukZCs&list=PL92k2xCT1v3l7mrk_NEndTvOMmK_621J9', 'length': '3:42'}, {'title': 'Meiko - Leave The Lights On (Krot Remix)', 'link': 'https://www.youtube.com/watch?v=uO7kCUjUaUE&list=PL92k2xCT1v3l7mrk_NEndTvOMmK_621J9', 'length': '6:46'}] '''
Использование результатов фильма Youtube API
from serpapi import GoogleSearch def get_movie_results(): params = { "api_key": "YOUR_API_KEY", "engine": "youtube", "search_query": "mortal kombat 2021 movie" } search = GoogleSearch(params) results = search.get_dict() for result in results['movie_results']: title = result['title'] link = result['link'] channel = result['channel'] desc = result['description'] movie_info = result['info'] print(f'{title}\n{link}\n{channel}\n{desc}\n{movie_info}\n') get_movie_results() # output: ''' Mortal Kombat (2021) {'name': 'YouTube Movies', 'link': 'https://www.youtube.com/channel/UClgRkhTL3_hImCAmdLfDE4g', 'verified': True} ['Action & Adventure • 2021 • R • English audio (and 8 more)', 'Actors: Lewis Tan, Jessica McNamee, Josh Lawson', 'Director: Simon McQuoid'] Mortal Kombat Legends: Scorpion's Revenge {'name': 'YouTube Movies', 'link': 'https://www.youtube.com/channel/UClgRkhTL3_hImCAmdLfDE4g', 'verified': True} ['Animation • 2020 • R • English audio (and 4 more)', 'Actor: Darren De Paul', 'Director: Ethan Spaulding'] '''
Использование результатов категории YouTube API
from serpapi import GoogleSearch def get_category_results(): params = { "api_key": "YOR_API_KEY", "engine": "youtube", "search_query": "mojang" } search = GoogleSearch(params) results = search.get_dict() for result in results['latest_from_minecraft']: title = result['title'] link = result['link'] channel = result['channel'] published_date = result['published_date'] views = result['views'] extensions = result['extensions'] print(f'{title}\n{link}\n{channel}\n{published_date}\n{views}\n{extensions}\n') get_category_results() # part of the output: ''' Ask Mojang #18: More Mobs! {'name': 'Minecraft', 'link': 'https://www.youtube.com/user/TeamMojang', 'verified': True, 'thumbnail': 'https://yt3.ggpht.com/KYt9rfP_fcswzs2RzvossPvKHOcP7W2gWFylRpAskW7IadpfgUgUrhttiYGtLs-P-LufgXpuc9E=s68-c-k-c0x00ffffff-no-rj'} 2 days ago 574573 ['New', 'CC'] '''
Ссылки
Код в онлайн IDE ( Примечание. Иногда Rellit бросает ошибку при использовании Selenium
, даже после добавления нескольких аргументов для запуска внутрь релита. Если это произойдет, запустите код локально. )
Результаты поисковой системы YouTube API
Outro.
Если у вас есть какие-либо вопросы или что-то не работает правильно, или вы хотите написать что-то еще, не стесняйтесь бросить комментарий в разделе комментариев или через Twitter на @serp_api Отказ
Твой, димитрий, а остальная часть команды серпапи.
Оригинал: “https://dev.to/dimitryzub/scrape-youtube-search-with-python-part-2-29jc”