diff --git a/notebooks/Block_5/Seleniumtest/Download_Images.ipynb b/notebooks/Block_5/Seleniumtest/Download_Images.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..e1208d19aea89f3d56bbd5e41b38a7115da483ba --- /dev/null +++ b/notebooks/Block_5/Seleniumtest/Download_Images.ipynb @@ -0,0 +1,75 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'Image'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-2-84df93c71169>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mselenium\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mwebdriver\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mselenium\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwebdriver\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfirefox\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptions\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mOptions\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mImage_crawling\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mImage_crawling\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;31m# Set options\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/work/hslu-deep-learning/notebooks/Block_5/Seleniumtest/Image_crawling.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mio\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mImage\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 10\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mrequests\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'Image'" + ] + } + ], + "source": [ + "from selenium import webdriver\n", + "from selenium.webdriver.firefox.options import Options\n", + "from Image_crawling import Image_crawling\n", + "\n", + "# Set options\n", + "options = webdriver.FirefoxOptions() \n", + "options = webdriver.FirefoxOptions()\n", + "options.add_argument('--headless')\n", + "\n", + "# Create Driver\n", + "driver = webdriver.Firefox(options=options, executable_path=\"/usr/bin/geckodriver\")\n", + "\n", + "# create instance of crawler\n", + "image_crawling = Image_crawling(driver)\n", + "\n", + "# Craws image urls:\n", + "image_urls = image_crawling.fetch_image_urls(\"sailing\", 10)\n", + "print(image_urls)\n", + "\n", + "# download images\n", + "image_crawling.download_image(\"./images\")\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/Block_5/Seleniumtest/Image_crawling.py b/notebooks/Block_5/Seleniumtest/Image_crawling.py new file mode 100644 index 0000000000000000000000000000000000000000..d47c924851ab269883c2f75fff1f7e885519fc11 --- /dev/null +++ b/notebooks/Block_5/Seleniumtest/Image_crawling.py @@ -0,0 +1,109 @@ +""" Class containing all functions needed to download images from Google +Following example set by: https://towardsdatascience.com/image-scraping-with-python-a96feda8af2d +Adepted by: Simon van Hemert +Date edited:2021.01.05 """ + +import time +import os +import io +from PIL import Image +import requests + +class Image_crawling: + def __init__(self, drive): + self.sleep_between_interactions = 0.1 + + self.drive = drive + + def fetch_image_urls(self, query:str, max_links_to_fetch:int): + + # build the google query + search_url = "https://www.google.com/search?safe=off&site=&tbm=isch&source=hp&q={q}&oq={q}&gs_l=img" + + # load the page + self.drive.get(search_url.format(q=query)) + + image_urls = set() + image_count = 0 + results_start = 0 + while image_count < max_links_to_fetch: + self.scroll_to_end() + + # get all image thumbnail results + thumbnail_results = self.drive.find_elements_by_css_selector("img.Q4LuWd") + number_results = len(thumbnail_results) + + print(f"Found: {number_results} search results. Extracting links from {results_start}:{number_results}") + + for img in thumbnail_results[results_start:number_results]: + # try to click every thumbnail such that we can get the real image behind it + try: + img.click() + time.sleep(self.sleep_between_interactions) + except Exception as e: + print("Exception", e, "occured in clicking on thumbnails ") + continue + + # extract image urls + actual_images = self.drive.find_elements_by_css_selector('img.n3VNCb') + for actual_image in actual_images: + if actual_image.get_attribute('src') and 'http' in actual_image.get_attribute('src'): + image_urls.add(actual_image.get_attribute('src')) + + image_count = len(image_urls) + + if len(image_urls) >= max_links_to_fetch: + print(f"Found: {len(image_urls)} image links, done!") + break + else: + print("Found:", len(image_urls), "image links, looking for more ...") + time.sleep(5) + return + load_more_button = self.drive.find_element_by_css_selector(".mye4qd") + if load_more_button: + self.drive.execute_script("document.querySelector('.mye4qd').click();") + + # move the result startpoint further down + results_start = len(thumbnail_results) + + self.image_urls = image_urls + + return image_urls + + + def scroll_to_end(self): + + self.drive.execute_script("window.scrollTo(0, document.body.scrollHeight);") + time.sleep(self.sleep_between_interactions) + + + def download_image(self, folder_path:str): + for url in self.image_urls: + try: + image_content = requests.get(url).content + + except Exception as e: + print(f"ERROR - Could not download {url} - {e}") + + try: + image_file = io.BytesIO(image_content) + image = Image.open(image_file).convert('RGB') + file_path = os.path.join(folder_path,hashlib.sha1(image_content).hexdigest()[:10] + '.jpg') + with open(file_path, 'wb') as f: + image.save(f, "JPEG", quality=85) + print(f"SUCCESS - saved {url} - as {file_path}") + except Exception as e: + print(f"ERROR - Could not save {url} - {e}") + + +# def search_and_download(search_term:str,driver_path:str,target_path='./images',number_images=5): +# target_folder = os.path.join(target_path,'_'.join(search_term.lower().split(' '))) + +# if not os.path.exists(target_folder): +# os.makedirs(target_folder) + +# with webdriver.Chrome(executable_path=driver_path) as wd: +# res = fetch_image_urls(search_term, number_images, wd=wd, sleep_between_interactions=0.5) + +# for elem in res: +# persist_image(target_folder,elem) diff --git a/notebooks/Block_5/Seleniumtest/Selenium_test.ipynb b/notebooks/Block_5/Seleniumtest/Selenium_test.ipynb index dc890fc5f101726c690117bd34eacf8ea91e8db9..6a1f410c3c9f9f0a575336593265977de709614c 100644 --- a/notebooks/Block_5/Seleniumtest/Selenium_test.ipynb +++ b/notebooks/Block_5/Seleniumtest/Selenium_test.ipynb @@ -1,50 +1,5 @@ { "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### GoogleImageExtractor\n", - "using geckodrive" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "ename": "WebDriverException", - "evalue": "Message: Process unexpectedly closed with status 1\n", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mWebDriverException\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m<ipython-input-11-c110f91c975f>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0mw\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_num_image_to_dl\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 10\u001b[0;31m \u001b[0mw\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmulti_search_download\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m/work/hslu-deep-learning/notebooks/Block_5/Seleniumtest/GoogleImageExtractor.py\u001b[0m in \u001b[0;36mmulti_search_download\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 137\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 138\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformed_search_url\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 139\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mretrieve_source_fr_html\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 140\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mextract_pic_url\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 141\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdownloading_all_photos\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m#some download might not be jpg?? use selnium to download??\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/work/hslu-deep-learning/notebooks/Block_5/Seleniumtest/GoogleImageExtractor.py\u001b[0m in \u001b[0;36mretrieve_source_fr_html\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 91\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 92\u001b[0m \"\"\"\n\u001b[0;32m---> 93\u001b[0;31m \u001b[0mdriver\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mwebdriver\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mFirefox\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 94\u001b[0m \u001b[0mdriver\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtarget_url_str\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 95\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/selenium/webdriver/firefox/webdriver.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, firefox_profile, firefox_binary, timeout, capabilities, proxy, executable_path, options, service_log_path, firefox_options, service_args, desired_capabilities, log_path, keep_alive)\u001b[0m\n\u001b[1;32m 172\u001b[0m \u001b[0mcommand_executor\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mexecutor\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 173\u001b[0m \u001b[0mdesired_capabilities\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcapabilities\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 174\u001b[0;31m keep_alive=True)\n\u001b[0m\u001b[1;32m 175\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 176\u001b[0m \u001b[0;31m# Selenium remote\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, command_executor, desired_capabilities, browser_profile, proxy, keep_alive, file_detector, options)\u001b[0m\n\u001b[1;32m 155\u001b[0m warnings.warn(\"Please use FirefoxOptions to set browser profile\",\n\u001b[1;32m 156\u001b[0m DeprecationWarning, stacklevel=2)\n\u001b[0;32m--> 157\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstart_session\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcapabilities\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbrowser_profile\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 158\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_switch_to\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mSwitchTo\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 159\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_mobile\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mMobile\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py\u001b[0m in \u001b[0;36mstart_session\u001b[0;34m(self, capabilities, browser_profile)\u001b[0m\n\u001b[1;32m 250\u001b[0m parameters = {\"capabilities\": w3c_caps,\n\u001b[1;32m 251\u001b[0m \"desiredCapabilities\": capabilities}\n\u001b[0;32m--> 252\u001b[0;31m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mCommand\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mNEW_SESSION\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparameters\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 253\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m'sessionId'\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mresponse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 254\u001b[0m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mresponse\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'value'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py\u001b[0m in \u001b[0;36mexecute\u001b[0;34m(self, driver_command, params)\u001b[0m\n\u001b[1;32m 319\u001b[0m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcommand_executor\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdriver_command\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 320\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mresponse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 321\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0merror_handler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcheck_response\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresponse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 322\u001b[0m response['value'] = self._unwrap_value(\n\u001b[1;32m 323\u001b[0m response.get('value', None))\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/selenium/webdriver/remote/errorhandler.py\u001b[0m in \u001b[0;36mcheck_response\u001b[0;34m(self, response)\u001b[0m\n\u001b[1;32m 240\u001b[0m \u001b[0malert_text\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'alert'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'text'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 241\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mexception_class\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmessage\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscreen\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstacktrace\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0malert_text\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 242\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mexception_class\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmessage\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscreen\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstacktrace\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 243\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 244\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_value_or_default\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdefault\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mWebDriverException\u001b[0m: Message: Process unexpectedly closed with status 1\n" - ] - } - ], - "source": [ - "from GoogleImageExtractor import GoogleImageExtractor\n", - "\n", - "\"\"\"test the downloading of files\"\"\"\n", - "# queries = \"brad pitt, johnny depp, leonardo dicaprio, robert de niro, angelina jolie, sandra bullock, catherine deneuve, marion cotillard\"\n", - "queries = \"auto, zeilschip\"\n", - "\n", - "w = GoogleImageExtractor(queries) #leave blanks if get the search list from file\n", - "\n", - "w.set_num_image_to_dl(2)\n", - "w.multi_search_download()" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -136,34 +91,111 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 6, "metadata": {}, "outputs": [ { - "name": "stderr", + "name": "stdout", "output_type": "stream", "text": [ - "/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:8: DeprecationWarning: use options instead of chrome_options\n", - " \n" - ] - }, - { - "ename": "WebDriverException", - "evalue": "Message: 'chromedriver' executable needs to be in PATH. Please see https://sites.google.com/a/chromium.org/chromedriver/home\n", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/selenium/webdriver/common/service.py\u001b[0m in \u001b[0;36mstart\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 75\u001b[0m \u001b[0mstderr\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlog_file\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 76\u001b[0;31m stdin=PIPE)\n\u001b[0m\u001b[1;32m 77\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/subprocess.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, args, bufsize, executable, stdin, stdout, stderr, preexec_fn, close_fds, shell, cwd, env, universal_newlines, startupinfo, creationflags, restore_signals, start_new_session, pass_fds, encoding, errors, text)\u001b[0m\n\u001b[1;32m 799\u001b[0m \u001b[0merrread\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrwrite\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 800\u001b[0;31m restore_signals, start_new_session)\n\u001b[0m\u001b[1;32m 801\u001b[0m \u001b[0;32mexcept\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/subprocess.py\u001b[0m in \u001b[0;36m_execute_child\u001b[0;34m(self, args, executable, preexec_fn, close_fds, pass_fds, cwd, env, startupinfo, creationflags, shell, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite, restore_signals, start_new_session)\u001b[0m\n\u001b[1;32m 1550\u001b[0m \u001b[0merr_msg\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;34m': '\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mrepr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0merr_filename\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1551\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mchild_exception_type\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0merrno_num\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merr_msg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merr_filename\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1552\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mchild_exception_type\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0merr_msg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'chromedriver': 'chromedriver'", - "\nDuring handling of the above exception, another exception occurred:\n", - "\u001b[0;31mWebDriverException\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m<ipython-input-18-42e4e5598c45>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0moptions\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd_argument\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"--test-type\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0moptions\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbinary_location\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"/home/work/hslu-deep-learning/notebooks/Block 5/Selenium test/chromedriver\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 8\u001b[0;31m \u001b[0mdriver\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mwebdriver\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mChrome\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mchrome_options\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0moptions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 9\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0mdriver\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'https://imgur.com/'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/selenium/webdriver/chrome/webdriver.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, executable_path, port, options, service_args, desired_capabilities, service_log_path, chrome_options, keep_alive)\u001b[0m\n\u001b[1;32m 71\u001b[0m \u001b[0mservice_args\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mservice_args\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 72\u001b[0m log_path=service_log_path)\n\u001b[0;32m---> 73\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mservice\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstart\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 74\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 75\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/selenium/webdriver/common/service.py\u001b[0m in \u001b[0;36mstart\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 81\u001b[0m raise WebDriverException(\n\u001b[1;32m 82\u001b[0m \"'%s' executable needs to be in PATH. %s\" % (\n\u001b[0;32m---> 83\u001b[0;31m os.path.basename(self.path), self.start_error_message)\n\u001b[0m\u001b[1;32m 84\u001b[0m )\n\u001b[1;32m 85\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0merrno\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0merrno\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mEACCES\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mWebDriverException\u001b[0m: Message: 'chromedriver' executable needs to be in PATH. Please see https://sites.google.com/a/chromium.org/chromedriver/home\n" + "https://pixel.quantserve.com/pixel/p-f8oruOqDFlMeI.gif\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-new-post.13ab64f9f36ad8f25ae3544b350e2ae1.svg\n", + "https://s.imgur.com/images/favicon-32x32.png\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-search.8d0f9b564a4659d48d8eca38b968a7f2.svg\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-emerald-hero.01efdc1ccdadae307801b620cfa17756.png\n", + "https://s.imgur.com/desktop-assets/desktop-assets/avatar-peanut-butter.9e588caafea04959912c1e048ba87d7f.png\n", + "https://s.imgur.com/desktop-assets/desktop-assets/avatar-toaster.b497f4c2ec340d8ec112674228cf1e78.png\n", + "https://s.imgur.com/desktop-assets/desktop-assets/avatar-alien.1c7563678882fbd38fa859a0136cac7d.png\n", + "https://s.imgur.com/images/favicon-32x32.png\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-pause.68f07ce1a7e07bac06d1f2c527d7a9e5.svg\n", + "https://i.imgur.com/YPfpL7S_d.webp?maxwidth=520&shape=thumb&fidelity=high\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", + "https://i.imgur.com/dC6K5SR_d.webp?maxwidth=520&shape=thumb&fidelity=high\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", + "https://i.imgur.com/Pe9nYNF_d.webp?maxwidth=520&shape=thumb&fidelity=high\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", + "https://i.imgur.com/OuGFt1z_d.webp?maxwidth=520&shape=thumb&fidelity=high\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", + "https://i.imgur.com/Clmc0LC_d.webp?maxwidth=520&shape=thumb&fidelity=high\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", + "https://i.imgur.com/8DFxOOv_d.webp?maxwidth=520&shape=thumb&fidelity=high\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", + "https://i.imgur.com/i41PPf0_d.webp?maxwidth=520&shape=thumb&fidelity=high\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", + "https://imgur.com/[object%20Object]\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", + "https://i.imgur.com/FHOSnBD_d.webp?maxwidth=520&shape=thumb&fidelity=high\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", + "https://i.imgur.com/U8r1Jh4_d.webp?maxwidth=520&shape=thumb&fidelity=high\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", + "https://imgur.com/[object%20Object]\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", + "https://imgur.com/[object%20Object]\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", + "https://imgur.com/[object%20Object]\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", + "https://i.imgur.com/PeWnuOC_d.webp?maxwidth=520&shape=thumb&fidelity=high\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", + "https://i.imgur.com/R9rehQX_d.webp?maxwidth=520&shape=thumb&fidelity=high\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", + "https://i.imgur.com/50n98gu_d.webp?maxwidth=520&shape=thumb&fidelity=high\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", + "https://i.imgur.com/xuTnHTc_d.webp?maxwidth=520&shape=thumb&fidelity=high\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", + "https://imgur.com/[object%20Object]\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", + "https://i.imgur.com/EmkImEw_d.webp?maxwidth=520&shape=thumb&fidelity=high\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", + "https://i.imgur.com/bEJM71d_d.webp?maxwidth=520&shape=thumb&fidelity=high\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", + "https://i.imgur.com/ed17rXR_d.webp?maxwidth=520&shape=thumb&fidelity=high\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", + "https://i.imgur.com/RNZpTJm_d.webp?maxwidth=520&shape=thumb&fidelity=high\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", + "https://imgur.com/[object%20Object]\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", + "https://i.imgur.com/cHEkDQY_d.webp?maxwidth=520&shape=thumb&fidelity=high\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", + "https://i.imgur.com/JbqeNVT_d.webp?maxwidth=520&shape=thumb&fidelity=high\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", + "https://s.imgur.com/images/accolades/gem.png\n", + "https://i.imgur.com/XlhvEPH_d.webp?maxwidth=520&shape=thumb&fidelity=high\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", + "https://i.imgur.com/Tcgf1MX_d.webp?maxwidth=520&shape=thumb&fidelity=high\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", + "https://i.imgur.com/5mLrcgO_d.webp?maxwidth=520&shape=thumb&fidelity=high\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", + "https://s.imgur.com/desktop-assets/desktop-assets/baby-yoda.37513d23dae2853e3270ffbf0f262563.svg\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-close-outline.3f046db44f7214dba26a6218e3250e44.svg\n", + "https://s.imgur.com/desktop-assets/desktop-assets/icon-points.5867bfb88971853dcfdf49b18f8455b9.svg\n" ] } ], @@ -171,11 +203,12 @@ "from selenium import webdriver\n", "\n", "\n", - "options = webdriver.ChromeOptions()\n", + "options = webdriver.FirefoxOptions()\n", "options.add_argument('--ignore-certificate-errors')\n", "options.add_argument(\"--test-type\")\n", - "options.binary_location = \"/home/work/hslu-deep-learning/notebooks/Block_5/Seleniumtest/chromedriver\"\n", - "driver = webdriver.Chrome(chrome_options=options)\n", + "options.add_argument('--headless')\n", + "options.executable_path = \"/usr/bin/geckodriver\"\n", + "driver = webdriver.Firefox(options=options)\n", "\n", "driver.get('https://imgur.com/')\n", "\n", @@ -185,6 +218,66 @@ "\n", "driver.close()\n" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Following Medium :\n", + "https://medium.com/@igorzabukovec/automate-web-crawling-with-selenium-python-part-1-85113660de96" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "from selenium import webdriver\n", + "from selenium.webdriver.firefox.options import Options\n", + "\n", + "options = webdriver.FirefoxOptions()\n", + "options.add_argument('--headless')\n", + "\n", + "driver = webdriver.Firefox(options=options, executable_path=\"/usr/bin/geckodriver\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Following towards data science:\n", + "https://towardsdatascience.com/image-scraping-with-python-a96feda8af2d" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "ename": "WebDriverException", + "evalue": "Message: Process unexpectedly closed with status 1\n", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mWebDriverException\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-4-a8e68fd1e84e>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mDRIVER_PATH\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"/usr/bin/geckodriver\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mwd\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mwebdriver\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mFirefox\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexecutable_path\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mDRIVER_PATH\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/selenium/webdriver/firefox/webdriver.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, firefox_profile, firefox_binary, timeout, capabilities, proxy, executable_path, options, service_log_path, firefox_options, service_args, desired_capabilities, log_path, keep_alive)\u001b[0m\n\u001b[1;32m 172\u001b[0m \u001b[0mcommand_executor\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mexecutor\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 173\u001b[0m \u001b[0mdesired_capabilities\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcapabilities\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 174\u001b[0;31m keep_alive=True)\n\u001b[0m\u001b[1;32m 175\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 176\u001b[0m \u001b[0;31m# Selenium remote\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, command_executor, desired_capabilities, browser_profile, proxy, keep_alive, file_detector, options)\u001b[0m\n\u001b[1;32m 155\u001b[0m warnings.warn(\"Please use FirefoxOptions to set browser profile\",\n\u001b[1;32m 156\u001b[0m DeprecationWarning, stacklevel=2)\n\u001b[0;32m--> 157\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstart_session\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcapabilities\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbrowser_profile\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 158\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_switch_to\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mSwitchTo\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 159\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_mobile\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mMobile\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py\u001b[0m in \u001b[0;36mstart_session\u001b[0;34m(self, capabilities, browser_profile)\u001b[0m\n\u001b[1;32m 250\u001b[0m parameters = {\"capabilities\": w3c_caps,\n\u001b[1;32m 251\u001b[0m \"desiredCapabilities\": capabilities}\n\u001b[0;32m--> 252\u001b[0;31m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mCommand\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mNEW_SESSION\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparameters\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 253\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m'sessionId'\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mresponse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 254\u001b[0m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mresponse\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'value'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py\u001b[0m in \u001b[0;36mexecute\u001b[0;34m(self, driver_command, params)\u001b[0m\n\u001b[1;32m 319\u001b[0m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcommand_executor\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdriver_command\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 320\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mresponse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 321\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0merror_handler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcheck_response\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresponse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 322\u001b[0m response['value'] = self._unwrap_value(\n\u001b[1;32m 323\u001b[0m response.get('value', None))\n", + "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/selenium/webdriver/remote/errorhandler.py\u001b[0m in \u001b[0;36mcheck_response\u001b[0;34m(self, response)\u001b[0m\n\u001b[1;32m 240\u001b[0m \u001b[0malert_text\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'alert'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'text'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 241\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mexception_class\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmessage\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscreen\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstacktrace\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0malert_text\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 242\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mexception_class\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmessage\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscreen\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstacktrace\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 243\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 244\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_value_or_default\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdefault\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mWebDriverException\u001b[0m: Message: Process unexpectedly closed with status 1\n" + ] + } + ], + "source": [ + "from selenium import webdriver\n", + "\n", + "DRIVER_PATH = \"/usr/bin/geckodriver\"\n", + "wd = webdriver.Firefox(executable_path=DRIVER_PATH)" + ] } ], "metadata": { diff --git a/requirements.txt b/requirements.txt index adac4c44eca1fe50852ce39c17322ff04f7b0d11..995e0edffea091e3e466560b2eafefaa13a50753 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,3 +9,4 @@ vega_datasets==0.8.0 mrcnn==0.2 altair==4.1.0 selenium==3.141.0 +pillow==8.0.0