Skip to content

Improved readability of web_programming/get_imdbtop.py and added documentations with doctests #4855

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Oct 16, 2021
57 changes: 45 additions & 12 deletions web_programming/get_imdbtop.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,53 @@
import bs4
import requests
from bs4 import BeautifulSoup


def imdb_top(imdb_top_n):
def get_movie_data_from_soup(soup: bs4.element.ResultSet) -> dict[str, str]:
return {
"name": soup.h3.a.text,
"genre": soup.find("span", class_="genre").text.strip(),
"rating": soup.strong.text,
"page_link": f"https://www.imdb.com{soup.a.get('href')}",
}


def get_imdb_top_movies(num_movies: int = 5) -> tuple:
"""Get the top num_movies most highly rated movies from IMDB and
return a tuple of dicts describing each movie's name, genre, rating, and URL.

Args:
num_movies: The number of movies to get. Defaults to 5.

Returns:
A list of tuples containing information about the top n movies.

>>> len(get_imdb_top_movies(5))
5
>>> len(get_imdb_top_movies(-3))
0
>>> len(get_imdb_top_movies(4.99999))
4
"""
num_movies = int(float(num_movies))
if num_movies < 1:
return ()
base_url = (
f"https://www.imdb.com/search/title?title_type="
f"feature&sort=num_votes,desc&count={imdb_top_n}"
"https://www.imdb.com/search/title?title_type="
f"feature&sort=num_votes,desc&count={num_movies}"
)
source = bs4.BeautifulSoup(requests.get(base_url).content, "html.parser")
return tuple(
get_movie_data_from_soup(movie)
for movie in source.find_all("div", class_="lister-item mode-advanced")
)
source = BeautifulSoup(requests.get(base_url).content, "html.parser")
for m in source.findAll("div", class_="lister-item mode-advanced"):
print("\n" + m.h3.a.text) # movie's name
print(m.find("span", attrs={"class": "genre"}).text) # genre
print(m.strong.text) # movie's rating
print(f"https://www.imdb.com{m.a.get('href')}") # movie's page link
print("*" * 40)


if __name__ == "__main__":
imdb_top(input("How many movies would you like to see? "))
import json

num_movies = int(input("How many movies would you like to see? "))
print(
", ".join(
json.dumps(movie, indent=4) for movie in get_imdb_top_movies(num_movies)
)
)