Problem scenario
You want to download pictures (e.g., .png, .jpeg etc.) from websites. How do you use Python to download such files?
Solution
Use this program with a subdirectory in the directory that this program resides to receive the picture files.
"""
dwldimages.py
Downloads all the images on the supplied URL, and saves them to the
specified output file ("/test/" by default)
Usage:
python dwldimages.py http://example.com/ /tmp/test/ # where /tmp/test/ is the directory you want to save the image files to.
# Mostly taken from https://stackoverflow.com/questions/257409/download-image-file-from-the-html-page-source-using-python
"""
from bs4 import BeautifulSoup as bs
import urlparse
from urllib2 import urlopen
from urllib import urlretrieve
import os
import sys
def main(url, out_folder="/test/"):
"""Downloads all the images at 'url' to /test/"""
soup = bs(urlopen(url))
parsed = list(urlparse.urlparse(url))
for image in soup.findAll("img"):
print "Image: %(src)s" % image
filename = image["src"].split("/")[-1]
parsed[2] = image["src"]
outpath = os.path.join(out_folder, filename)
if image["src"].lower().startswith("http"):
urlretrieve(image["src"], outpath)
else:
urlretrieve(urlparse.urlunparse(parsed), outpath)
def _usage():
print "usage: python dwldimages.py http://example.com [outpath]"
if __name__ == "__main__":
url = sys.argv[-1]
out_folder = "/test/"
if not url.lower().startswith("http"):
out_folder = sys.argv[-1]
url = sys.argv[-2]
if not url.lower().startswith("http"):
_usage()
sys.exit(-1)
main(url, out_folder)