Given a file containing a list of songs, one per line, in the format “Artist – Song Title”, download the audio of the first youtube video link on a Google search for that song. This is quite useful if you want to the MP3 for every song you ever gave a thumbs up on Pandora. On my computer, this averages about 4 songs a minute.

The Requests API and BeautifulSoup make writing screenscrapers and automating the web really clean and easy.

#!/usr/bin/python # Takes a list of titles of songs, in the format "artist - song" and searches for each
# song on google. The first youtube link is passed off to youtube-dl to download it and # get the MP3 out. This doesn't have any throttling because (in theory) the conversion step
# takes enough time to provide throttling. import requests
import re
from BeautifulSoup import BeautifulSoup
from subprocess import call def queryConverter(videoURL): call(["youtube-dl", "--extract-audio", "--audio-format", "mp3", videoURL]) def queryGoogle(songTitle): reqPreamble = "" reqData = {'q':songTitle} r = requests.get(reqPreamble, params=reqData) if r.status_code != 200: print "Failed to issue request to {0}".format(r.url) else: bs = BeautifulSoup(r.text) tubelinks = bs.findAll("a", attrs={'href':re.compile("watch")}) if len(tubelinks) > 0: vidUrl ="https[^&]*", tubelinks[0]['href']) vidUrl = requests.utils.unquote( return vidUrl else: print "No video for {0}".format(songTitle) if __name__=="__main__": with open("./all_pandora_likes", 'r') as inFile: for line in inFile: videoURL = queryGoogle(line) if videoURL is not None: queryConverter(videoURL)