#!/usr/bin/env python3 """ weather_scraper.py This short script requests the text from a webpage that reports the weather, then extracts ("scrapes") the information that we're interested in from that page and prints it out. Web scrapers can be finicky to manage--they rely on the webpage having a consistent format that they can look at--and there are better tools for analyzing text on a page, including Python's re tool (for looking at regular expressions) and the Beautiful Soup package. This simple script just uses the string method .index() to find what we're looking for on a relatively simple page, as a proof of concept. """ __author__ = "Richard White" __version__ = "2021-03-13" def main(): """Get the current temperature based on NOAA data This program uses information from NOAA's website to read the file for BUR airport's weather, and then parses that data to find the current temperature. Programs that attempt to parse HTML data are notoriously hacky: they rely on a more-or-less consistent format in that page. If the HTML formatting changes, this script has to be rewritten. """ import urllib.request # Used to read webpage html # identify the webpage we'll be reading from local_filename, headers = urllib.request.urlretrieve("https://forecast.weather.gov/MapClick.php?lat=34.1475&lon=-118.1443") html = open(local_filename) # read in the entire contents of that page # (We could read this in line by line, but we're going to end up parsing # through the whole thing anyway, so this is okay.) completeFileContent = "".join(html.readlines()) # Look for the lines that hav the current conditions listed # Find these by looking at # at the webpage's html code. You can do that either by using a browser # to "View Source" for a page, or by looking through the value of # completeFileContent after this program reads it in. start = completeFileContent.find('') stop = completeFileContent.find('
') + 34 tempstop = text.index('°F') temp = text[tempstart : tempstop] # Print out the characters with the current temperature print("The temperature in Pasadena is currently " + temp + " degrees Fahrenheit.") if __name__ == "__main__": main()