new_titles = [t for t in raw_titles if t not in known] if not new_titles: logging.info("No new titles detected.") else: logging.info(f"len(new_titles) new title(s) discovered!") enriched = [] for title in new_titles: try: # Use Posy to pull clean metadata result = posy.search(title) if result: movie = posy.enrich(result[0]) enriched.append(movie.__dict__) else: logging.warning(f"Posy could not resolve: title") except Exception as e: logging.error(f"Error processing 'title': e")
# Load environment variables (e.g., OMDB_API_KEY) load_dotenv() OMDB_KEY = os.getenv("OMDB_API_KEY") # optional but recommended
# 3. (Optional) Add your OMDb API key to a .env file: # OMDB_API_KEY=your_key_here pip & posy filmyzilla
# De‑duplicate against a local cache cache_file = Path("cache.txt") known = set(cache_file.read_text().splitlines()) if cache_file.exists() else set()
# ---------------------------------------------------------------------- # Helper: fetch the Filmyzilla “latest releases” page # ---------------------------------------------------------------------- BASE_URL = "https://www.filmyzilla.com" LATEST_PATH = "/new-movies" # this path varies; adjust as needed new_titles = [t for t in raw_titles if
# Enrich the first result with full metadata movie = p.enrich(movies[0])
# Store results in a CSV for later analysis df = pd.DataFrame(enriched) out_path = Path("filmyzilla_watch.csv") df.to_csv(out_path, mode='a', header=not out_path.exists(), index=False) logging.info(f"Appended len(enriched) rows to out_path") title_tags = soup
# ---------------------------------------------------------------------- # Helper: parse movie titles from the HTML # ---------------------------------------------------------------------- def parse_titles(html: str): soup = BeautifulSoup(html, "html.parser") # Filmyzilla typically lists titles inside <a class="movie-title"> tags. # Adjust the selector if the site changes. title_tags = soup.select("a.movie-title") titles = [tag.get_text(strip=True) for tag in title_tags] logging.info(f"Found len(titles) titles on the page.") return titles