Scrapur/search.py

32 lines
1.2 KiB
Python

import requests
import re
from lxml import html, etree
def search_site(search_string: str):
search = search_string
search = search.replace(" ", "%20")
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36"
}
search_res = requests.get(f"https://prehrajto.cz/hledej/{search}", headers=headers)
tree = html.fromstring(search_res.content)
links = tree.xpath('.//a[contains(@class, "video video--small video--link")]')
if not links:
print("EMPTYYY")
x = []
for link in links:
title = link.get("title").replace("+", " ").replace("-", " ").replace(".", " ") # title of video
href = link.get("href") # link to video
thumbnail = link.xpath('.//img[contains(concat(" ", normalize-space(@class), " "), " thumb thumb1 ")]')[0].get("src") # link to thumbnail
x.append({'title': title,
'href': href,
'thumbnail': thumbnail})
return x
def get_video_url(href: str):
video_site = requests.get(f"https://prehrajto.cz/{href}")
x = re.search('(https:.{0,6}?storage.+?)\"', video_site.content.decode("utf-8")).groups()
return x[0]