Skip to content
Snippets Groups Projects
Commit b00b3676 authored by Simon van Hemert's avatar Simon van Hemert
Browse files

Added Image crawling

parent 8ea90ff4
No related branches found
No related tags found
No related merge requests found
%% Cell type:code id: tags:
``` python
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from Image_crawling import Image_crawling
# Set options
options = webdriver.FirefoxOptions()
options = webdriver.FirefoxOptions()
options.add_argument('--headless')
# Create Driver
driver = webdriver.Firefox(options=options, executable_path="/usr/bin/geckodriver")
# create instance of crawler
image_crawling = Image_crawling(driver)
# Craws image urls:
image_urls = image_crawling.fetch_image_urls("sailing", 10)
print(image_urls)
# download images
image_crawling.download_image("./images")
```
%% Output
---------------------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
<ipython-input-2-84df93c71169> in <module>
1 from selenium import webdriver
2 from selenium.webdriver.firefox.options import Options
----> 3 from Image_crawling import Image_crawling
4
5 # Set options
/work/hslu-deep-learning/notebooks/Block_5/Seleniumtest/Image_crawling.py in <module>
7 import os
8 import io
----> 9 import Image
10 import requests
11
ModuleNotFoundError: No module named 'Image'
%% Cell type:code id: tags:
``` python
```
""" Class containing all functions needed to download images from Google
Following example set by: https://towardsdatascience.com/image-scraping-with-python-a96feda8af2d
Adepted by: Simon van Hemert
Date edited:2021.01.05 """
import time
import os
import io
from PIL import Image
import requests
class Image_crawling:
def __init__(self, drive):
self.sleep_between_interactions = 0.1
self.drive = drive
def fetch_image_urls(self, query:str, max_links_to_fetch:int):
# build the google query
search_url = "https://www.google.com/search?safe=off&site=&tbm=isch&source=hp&q={q}&oq={q}&gs_l=img"
# load the page
self.drive.get(search_url.format(q=query))
image_urls = set()
image_count = 0
results_start = 0
while image_count < max_links_to_fetch:
self.scroll_to_end()
# get all image thumbnail results
thumbnail_results = self.drive.find_elements_by_css_selector("img.Q4LuWd")
number_results = len(thumbnail_results)
print(f"Found: {number_results} search results. Extracting links from {results_start}:{number_results}")
for img in thumbnail_results[results_start:number_results]:
# try to click every thumbnail such that we can get the real image behind it
try:
img.click()
time.sleep(self.sleep_between_interactions)
except Exception as e:
print("Exception", e, "occured in clicking on thumbnails ")
continue
# extract image urls
actual_images = self.drive.find_elements_by_css_selector('img.n3VNCb')
for actual_image in actual_images:
if actual_image.get_attribute('src') and 'http' in actual_image.get_attribute('src'):
image_urls.add(actual_image.get_attribute('src'))
image_count = len(image_urls)
if len(image_urls) >= max_links_to_fetch:
print(f"Found: {len(image_urls)} image links, done!")
break
else:
print("Found:", len(image_urls), "image links, looking for more ...")
time.sleep(5)
return
load_more_button = self.drive.find_element_by_css_selector(".mye4qd")
if load_more_button:
self.drive.execute_script("document.querySelector('.mye4qd').click();")
# move the result startpoint further down
results_start = len(thumbnail_results)
self.image_urls = image_urls
return image_urls
def scroll_to_end(self):
self.drive.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(self.sleep_between_interactions)
def download_image(self, folder_path:str):
for url in self.image_urls:
try:
image_content = requests.get(url).content
except Exception as e:
print(f"ERROR - Could not download {url} - {e}")
try:
image_file = io.BytesIO(image_content)
image = Image.open(image_file).convert('RGB')
file_path = os.path.join(folder_path,hashlib.sha1(image_content).hexdigest()[:10] + '.jpg')
with open(file_path, 'wb') as f:
image.save(f, "JPEG", quality=85)
print(f"SUCCESS - saved {url} - as {file_path}")
except Exception as e:
print(f"ERROR - Could not save {url} - {e}")
# def search_and_download(search_term:str,driver_path:str,target_path='./images',number_images=5):
# target_folder = os.path.join(target_path,'_'.join(search_term.lower().split(' ')))
# if not os.path.exists(target_folder):
# os.makedirs(target_folder)
# with webdriver.Chrome(executable_path=driver_path) as wd:
# res = fetch_image_urls(search_term, number_images, wd=wd, sleep_between_interactions=0.5)
# for elem in res:
# persist_image(target_folder,elem)
%% Cell type:markdown id: tags:
### GoogleImageExtractor
using geckodrive
%% Cell type:code id: tags:
``` python
from GoogleImageExtractor import GoogleImageExtractor
"""test the downloading of files"""
# queries = "brad pitt, johnny depp, leonardo dicaprio, robert de niro, angelina jolie, sandra bullock, catherine deneuve, marion cotillard"
queries = "auto, zeilschip"
w = GoogleImageExtractor(queries) #leave blanks if get the search list from file
w.set_num_image_to_dl(2)
w.multi_search_download()
```
%% Output
---------------------------------------------------------------------------
WebDriverException Traceback (most recent call last)
<ipython-input-11-c110f91c975f> in <module>
8
9 w.set_num_image_to_dl(2)
---> 10 w.multi_search_download()
/work/hslu-deep-learning/notebooks/Block_5/Seleniumtest/GoogleImageExtractor.py in multi_search_download(self)
137
138 self.formed_search_url()
--> 139 self.retrieve_source_fr_html()
140 self.extract_pic_url()
141 self.downloading_all_photos() #some download might not be jpg?? use selnium to download??
/work/hslu-deep-learning/notebooks/Block_5/Seleniumtest/GoogleImageExtractor.py in retrieve_source_fr_html(self)
91
92 """
---> 93 driver = webdriver.Firefox()
94 driver.get(self.target_url_str)
95
/opt/conda/lib/python3.7/site-packages/selenium/webdriver/firefox/webdriver.py in __init__(self, firefox_profile, firefox_binary, timeout, capabilities, proxy, executable_path, options, service_log_path, firefox_options, service_args, desired_capabilities, log_path, keep_alive)
172 command_executor=executor,
173 desired_capabilities=capabilities,
--> 174 keep_alive=True)
175
176 # Selenium remote
/opt/conda/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py in __init__(self, command_executor, desired_capabilities, browser_profile, proxy, keep_alive, file_detector, options)
155 warnings.warn("Please use FirefoxOptions to set browser profile",
156 DeprecationWarning, stacklevel=2)
--> 157 self.start_session(capabilities, browser_profile)
158 self._switch_to = SwitchTo(self)
159 self._mobile = Mobile(self)
/opt/conda/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py in start_session(self, capabilities, browser_profile)
250 parameters = {"capabilities": w3c_caps,
251 "desiredCapabilities": capabilities}
--> 252 response = self.execute(Command.NEW_SESSION, parameters)
253 if 'sessionId' not in response:
254 response = response['value']
/opt/conda/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py in execute(self, driver_command, params)
319 response = self.command_executor.execute(driver_command, params)
320 if response:
--> 321 self.error_handler.check_response(response)
322 response['value'] = self._unwrap_value(
323 response.get('value', None))
/opt/conda/lib/python3.7/site-packages/selenium/webdriver/remote/errorhandler.py in check_response(self, response)
240 alert_text = value['alert'].get('text')
241 raise exception_class(message, screen, stacktrace, alert_text)
--> 242 raise exception_class(message, screen, stacktrace)
243
244 def _value_or_default(self, obj, key, default):
WebDriverException: Message: Process unexpectedly closed with status 1
%% Cell type:markdown id: tags:
### Selenium using Firefox webdriver
Somehow also refers to/looks for geckodrive
%% Cell type:code id: tags:
``` python
# Problem with getting this webdriver in current path or installed at all for that matter, could try to just copy the file into gitlab
# https://medium.com/cs-note/web-crawling-by-using-selenium-python-3-4fff0bdb4c65
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import NoAlertPresentException
import unittest, time, re
# driver = webdriver.Geckodriver()
driver = webdriver.Firefox(executable_path='/usr/bin/geckodriver')
```
%% Output
---------------------------------------------------------------------------
WebDriverException Traceback (most recent call last)
<ipython-input-8-3f533b936151> in <module>
10
11 # driver = webdriver.Geckodriver()
---> 12 driver = webdriver.Firefox(executable_path='/usr/bin/geckodriver')
/opt/conda/lib/python3.7/site-packages/selenium/webdriver/firefox/webdriver.py in __init__(self, firefox_profile, firefox_binary, timeout, capabilities, proxy, executable_path, options, service_log_path, firefox_options, service_args, desired_capabilities, log_path, keep_alive)
172 command_executor=executor,
173 desired_capabilities=capabilities,
--> 174 keep_alive=True)
175
176 # Selenium remote
/opt/conda/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py in __init__(self, command_executor, desired_capabilities, browser_profile, proxy, keep_alive, file_detector, options)
155 warnings.warn("Please use FirefoxOptions to set browser profile",
156 DeprecationWarning, stacklevel=2)
--> 157 self.start_session(capabilities, browser_profile)
158 self._switch_to = SwitchTo(self)
159 self._mobile = Mobile(self)
/opt/conda/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py in start_session(self, capabilities, browser_profile)
250 parameters = {"capabilities": w3c_caps,
251 "desiredCapabilities": capabilities}
--> 252 response = self.execute(Command.NEW_SESSION, parameters)
253 if 'sessionId' not in response:
254 response = response['value']
/opt/conda/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py in execute(self, driver_command, params)
319 response = self.command_executor.execute(driver_command, params)
320 if response:
--> 321 self.error_handler.check_response(response)
322 response['value'] = self._unwrap_value(
323 response.get('value', None))
/opt/conda/lib/python3.7/site-packages/selenium/webdriver/remote/errorhandler.py in check_response(self, response)
240 alert_text = value['alert'].get('text')
241 raise exception_class(message, screen, stacktrace, alert_text)
--> 242 raise exception_class(message, screen, stacktrace)
243
244 def _value_or_default(self, obj, key, default):
WebDriverException: Message: Process unexpectedly closed with status 1
%% Cell type:markdown id: tags:
### Selenium defining Driver path
Divines driver path, means could be both i assume.
%% Cell type:code id: tags:
``` python
# Same problem, driver needs to be in the path somehow.
# https://medium.com/swlh/web-scraping-stock-images-using-google-selenium-and-python-8b825ba649b9
import selenium
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
# DRIVER_PATH = '/home/work/hslu-deep-learning/notebooks/Block_5/Seleniumtest/geckodriver'
DRIVER_PATH = '/usr/bin/geckodriver'
service = Service(DRIVER_PATH)
service.start()
wd = webdriver.Remote(service.service_url)
wd.quit()
```
%% Output
---------------------------------------------------------------------------
WebDriverException Traceback (most recent call last)
<ipython-input-6-29c58177a12a> in <module>
9 service = Service(DRIVER_PATH)
10 service.start()
---> 11 wd = webdriver.Remote(service.service_url)
12 wd.quit()
/opt/conda/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py in __init__(self, command_executor, desired_capabilities, browser_profile, proxy, keep_alive, file_detector, options)
155 warnings.warn("Please use FirefoxOptions to set browser profile",
156 DeprecationWarning, stacklevel=2)
--> 157 self.start_session(capabilities, browser_profile)
158 self._switch_to = SwitchTo(self)
159 self._mobile = Mobile(self)
/opt/conda/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py in start_session(self, capabilities, browser_profile)
250 parameters = {"capabilities": w3c_caps,
251 "desiredCapabilities": capabilities}
--> 252 response = self.execute(Command.NEW_SESSION, parameters)
253 if 'sessionId' not in response:
254 response = response['value']
/opt/conda/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py in execute(self, driver_command, params)
319 response = self.command_executor.execute(driver_command, params)
320 if response:
--> 321 self.error_handler.check_response(response)
322 response['value'] = self._unwrap_value(
323 response.get('value', None))
/opt/conda/lib/python3.7/site-packages/selenium/webdriver/remote/errorhandler.py in check_response(self, response)
240 alert_text = value['alert'].get('text')
241 raise exception_class(message, screen, stacktrace, alert_text)
--> 242 raise exception_class(message, screen, stacktrace)
243
244 def _value_or_default(self, obj, key, default):
WebDriverException: Message: Process unexpectedly closed with status 1
%% Cell type:code id: tags:
``` python
from selenium import webdriver
options = webdriver.ChromeOptions()
options = webdriver.FirefoxOptions()
options.add_argument('--ignore-certificate-errors')
options.add_argument("--test-type")
options.binary_location = "/home/work/hslu-deep-learning/notebooks/Block_5/Seleniumtest/chromedriver"
driver = webdriver.Chrome(chrome_options=options)
options.add_argument('--headless')
options.executable_path = "/usr/bin/geckodriver"
driver = webdriver.Firefox(options=options)
driver.get('https://imgur.com/')
images = driver.find_elements_by_tag_name('img')
for image in images:
print(image.get_attribute('src'))
driver.close()
```
%% Output
/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:8: DeprecationWarning: use options instead of chrome_options
https://pixel.quantserve.com/pixel/p-f8oruOqDFlMeI.gif
https://s.imgur.com/desktop-assets/desktop-assets/icon-new-post.13ab64f9f36ad8f25ae3544b350e2ae1.svg
https://s.imgur.com/images/favicon-32x32.png
https://s.imgur.com/desktop-assets/desktop-assets/icon-search.8d0f9b564a4659d48d8eca38b968a7f2.svg
https://s.imgur.com/desktop-assets/desktop-assets/icon-emerald-hero.01efdc1ccdadae307801b620cfa17756.png
https://s.imgur.com/desktop-assets/desktop-assets/avatar-peanut-butter.9e588caafea04959912c1e048ba87d7f.png
https://s.imgur.com/desktop-assets/desktop-assets/avatar-toaster.b497f4c2ec340d8ec112674228cf1e78.png
https://s.imgur.com/desktop-assets/desktop-assets/avatar-alien.1c7563678882fbd38fa859a0136cac7d.png
https://s.imgur.com/images/favicon-32x32.png
https://s.imgur.com/desktop-assets/desktop-assets/icon-pause.68f07ce1a7e07bac06d1f2c527d7a9e5.svg
https://i.imgur.com/YPfpL7S_d.webp?maxwidth=520&shape=thumb&fidelity=high
https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg
https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg
https://i.imgur.com/dC6K5SR_d.webp?maxwidth=520&shape=thumb&fidelity=high
https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg
https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg
https://i.imgur.com/Pe9nYNF_d.webp?maxwidth=520&shape=thumb&fidelity=high
https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg
https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg
https://i.imgur.com/OuGFt1z_d.webp?maxwidth=520&shape=thumb&fidelity=high
https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg
https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg
https://i.imgur.com/Clmc0LC_d.webp?maxwidth=520&shape=thumb&fidelity=high
https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg
https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg
https://i.imgur.com/8DFxOOv_d.webp?maxwidth=520&shape=thumb&fidelity=high
https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg
https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg
https://i.imgur.com/i41PPf0_d.webp?maxwidth=520&shape=thumb&fidelity=high
https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg
https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg
https://imgur.com/[object%20Object]
https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg
https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg
https://i.imgur.com/FHOSnBD_d.webp?maxwidth=520&shape=thumb&fidelity=high
https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg
https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg
https://i.imgur.com/U8r1Jh4_d.webp?maxwidth=520&shape=thumb&fidelity=high
https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg
https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg
https://imgur.com/[object%20Object]
https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg
https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg
https://imgur.com/[object%20Object]
https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg
https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg
https://imgur.com/[object%20Object]
https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg
https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg
https://i.imgur.com/PeWnuOC_d.webp?maxwidth=520&shape=thumb&fidelity=high
https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg
https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg
https://i.imgur.com/R9rehQX_d.webp?maxwidth=520&shape=thumb&fidelity=high
https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg
https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg
https://i.imgur.com/50n98gu_d.webp?maxwidth=520&shape=thumb&fidelity=high
https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg
https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg
https://i.imgur.com/xuTnHTc_d.webp?maxwidth=520&shape=thumb&fidelity=high
https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg
https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg
https://imgur.com/[object%20Object]
https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg
https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg
https://i.imgur.com/EmkImEw_d.webp?maxwidth=520&shape=thumb&fidelity=high
https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg
https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg
https://i.imgur.com/bEJM71d_d.webp?maxwidth=520&shape=thumb&fidelity=high
https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg
https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg
https://i.imgur.com/ed17rXR_d.webp?maxwidth=520&shape=thumb&fidelity=high
https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg
https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg
https://i.imgur.com/RNZpTJm_d.webp?maxwidth=520&shape=thumb&fidelity=high
https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg
https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg
https://imgur.com/[object%20Object]
https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg
https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg
https://i.imgur.com/cHEkDQY_d.webp?maxwidth=520&shape=thumb&fidelity=high
https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg
https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg
https://i.imgur.com/JbqeNVT_d.webp?maxwidth=520&shape=thumb&fidelity=high
https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg
https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg
https://s.imgur.com/images/accolades/gem.png
https://i.imgur.com/XlhvEPH_d.webp?maxwidth=520&shape=thumb&fidelity=high
https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg
https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg
https://i.imgur.com/Tcgf1MX_d.webp?maxwidth=520&shape=thumb&fidelity=high
https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg
https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg
https://i.imgur.com/5mLrcgO_d.webp?maxwidth=520&shape=thumb&fidelity=high
https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg
https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg
https://s.imgur.com/desktop-assets/desktop-assets/baby-yoda.37513d23dae2853e3270ffbf0f262563.svg
https://s.imgur.com/desktop-assets/desktop-assets/icon-close-outline.3f046db44f7214dba26a6218e3250e44.svg
https://s.imgur.com/desktop-assets/desktop-assets/icon-points.5867bfb88971853dcfdf49b18f8455b9.svg
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
/opt/conda/lib/python3.7/site-packages/selenium/webdriver/common/service.py in start(self)
75 stderr=self.log_file,
---> 76 stdin=PIPE)
77 except TypeError:
/opt/conda/lib/python3.7/subprocess.py in __init__(self, args, bufsize, executable, stdin, stdout, stderr, preexec_fn, close_fds, shell, cwd, env, universal_newlines, startupinfo, creationflags, restore_signals, start_new_session, pass_fds, encoding, errors, text)
799 errread, errwrite,
--> 800 restore_signals, start_new_session)
801 except:
/opt/conda/lib/python3.7/subprocess.py in _execute_child(self, args, executable, preexec_fn, close_fds, pass_fds, cwd, env, startupinfo, creationflags, shell, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite, restore_signals, start_new_session)
1550 err_msg += ': ' + repr(err_filename)
-> 1551 raise child_exception_type(errno_num, err_msg, err_filename)
1552 raise child_exception_type(err_msg)
FileNotFoundError: [Errno 2] No such file or directory: 'chromedriver': 'chromedriver'
%% Cell type:markdown id: tags:
## Following Medium :
https://medium.com/@igorzabukovec/automate-web-crawling-with-selenium-python-part-1-85113660de96
%% Cell type:code id: tags:
During handling of the above exception, another exception occurred:
``` python
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
options = webdriver.FirefoxOptions()
options.add_argument('--headless')
driver = webdriver.Firefox(options=options, executable_path="/usr/bin/geckodriver")
```
%% Cell type:markdown id: tags:
# Following towards data science:
https://towardsdatascience.com/image-scraping-with-python-a96feda8af2d
%% Cell type:code id: tags:
``` python
from selenium import webdriver
DRIVER_PATH = "/usr/bin/geckodriver"
wd = webdriver.Firefox(executable_path=DRIVER_PATH)
```
%% Output
---------------------------------------------------------------------------
WebDriverException Traceback (most recent call last)
<ipython-input-18-42e4e5598c45> in <module>
6 options.add_argument("--test-type")
7 options.binary_location = "/home/work/hslu-deep-learning/notebooks/Block 5/Selenium test/chromedriver"
----> 8 driver = webdriver.Chrome(chrome_options=options)
9
10 driver.get('https://imgur.com/')
/opt/conda/lib/python3.7/site-packages/selenium/webdriver/chrome/webdriver.py in __init__(self, executable_path, port, options, service_args, desired_capabilities, service_log_path, chrome_options, keep_alive)
71 service_args=service_args,
72 log_path=service_log_path)
---> 73 self.service.start()
74
75 try:
/opt/conda/lib/python3.7/site-packages/selenium/webdriver/common/service.py in start(self)
81 raise WebDriverException(
82 "'%s' executable needs to be in PATH. %s" % (
---> 83 os.path.basename(self.path), self.start_error_message)
84 )
85 elif err.errno == errno.EACCES:
WebDriverException: Message: 'chromedriver' executable needs to be in PATH. Please see https://sites.google.com/a/chromium.org/chromedriver/home
<ipython-input-4-a8e68fd1e84e> in <module>
3
4 DRIVER_PATH = "/usr/bin/geckodriver"
----> 5 wd = webdriver.Firefox(executable_path=DRIVER_PATH)
/opt/conda/lib/python3.7/site-packages/selenium/webdriver/firefox/webdriver.py in __init__(self, firefox_profile, firefox_binary, timeout, capabilities, proxy, executable_path, options, service_log_path, firefox_options, service_args, desired_capabilities, log_path, keep_alive)
172 command_executor=executor,
173 desired_capabilities=capabilities,
--> 174 keep_alive=True)
175
176 # Selenium remote
/opt/conda/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py in __init__(self, command_executor, desired_capabilities, browser_profile, proxy, keep_alive, file_detector, options)
155 warnings.warn("Please use FirefoxOptions to set browser profile",
156 DeprecationWarning, stacklevel=2)
--> 157 self.start_session(capabilities, browser_profile)
158 self._switch_to = SwitchTo(self)
159 self._mobile = Mobile(self)
/opt/conda/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py in start_session(self, capabilities, browser_profile)
250 parameters = {"capabilities": w3c_caps,
251 "desiredCapabilities": capabilities}
--> 252 response = self.execute(Command.NEW_SESSION, parameters)
253 if 'sessionId' not in response:
254 response = response['value']
/opt/conda/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py in execute(self, driver_command, params)
319 response = self.command_executor.execute(driver_command, params)
320 if response:
--> 321 self.error_handler.check_response(response)
322 response['value'] = self._unwrap_value(
323 response.get('value', None))
/opt/conda/lib/python3.7/site-packages/selenium/webdriver/remote/errorhandler.py in check_response(self, response)
240 alert_text = value['alert'].get('text')
241 raise exception_class(message, screen, stacktrace, alert_text)
--> 242 raise exception_class(message, screen, stacktrace)
243
244 def _value_or_default(self, obj, key, default):
WebDriverException: Message: Process unexpectedly closed with status 1
......
......@@ -9,3 +9,4 @@ vega_datasets==0.8.0
mrcnn==0.2
altair==4.1.0
selenium==3.141.0
pillow==8.0.0
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment