diff --git a/notebooks/Block_5/Seleniumtest/Image_crawling.py b/notebooks/Block_5/Image_crawling.py similarity index 77% rename from notebooks/Block_5/Seleniumtest/Image_crawling.py rename to notebooks/Block_5/Image_crawling.py index d47c924851ab269883c2f75fff1f7e885519fc11..5947be6e737f0e579e1621a6f47201549cda0de2 100644 --- a/notebooks/Block_5/Seleniumtest/Image_crawling.py +++ b/notebooks/Block_5/Image_crawling.py @@ -10,13 +10,14 @@ from PIL import Image import requests class Image_crawling: - def __init__(self, drive): - self.sleep_between_interactions = 0.1 + def __init__(self, drive, waittime=0.1): + self.sleep_between_interactions = waittime self.drive = drive def fetch_image_urls(self, query:str, max_links_to_fetch:int): - + + self.query = query # build the google query search_url = "https://www.google.com/search?safe=off&site=&tbm=isch&source=hp&q={q}&oq={q}&gs_l=img" @@ -33,7 +34,7 @@ class Image_crawling: thumbnail_results = self.drive.find_elements_by_css_selector("img.Q4LuWd") number_results = len(thumbnail_results) - print(f"Found: {number_results} search results. Extracting links from {results_start}:{number_results}") +# print(f"Found: {number_results} search results. Extracting links from {results_start}:{number_results}") for img in thumbnail_results[results_start:number_results]: # try to click every thumbnail such that we can get the real image behind it @@ -53,7 +54,7 @@ class Image_crawling: image_count = len(image_urls) if len(image_urls) >= max_links_to_fetch: - print(f"Found: {len(image_urls)} image links, done!") + print(f"Found {len(image_urls)} image links") break else: print("Found:", len(image_urls), "image links, looking for more ...") @@ -78,7 +79,14 @@ class Image_crawling: def download_image(self, folder_path:str): + imagenr = 0 for url in self.image_urls: + # Create directory for images + try: + os.mkdir(folder_path) + except FileExistsError: + pass + try: image_content = requests.get(url).content @@ -88,22 +96,13 @@ class Image_crawling: try: image_file = io.BytesIO(image_content) image = Image.open(image_file).convert('RGB') - file_path = os.path.join(folder_path,hashlib.sha1(image_content).hexdigest()[:10] + '.jpg') + file_path = os.path.join(folder_path, self.query + str(imagenr) + '.jpg') with open(file_path, 'wb') as f: image.save(f, "JPEG", quality=85) - print(f"SUCCESS - saved {url} - as {file_path}") +# print(f"SUCCESS - saved {url} - as {file_path}") + imagenr += 1 except Exception as e: print(f"ERROR - Could not save {url} - {e}") - - -# def search_and_download(search_term:str,driver_path:str,target_path='./images',number_images=5): -# target_folder = os.path.join(target_path,'_'.join(search_term.lower().split(' '))) - -# if not os.path.exists(target_folder): -# os.makedirs(target_folder) - -# with webdriver.Chrome(executable_path=driver_path) as wd: -# res = fetch_image_urls(search_term, number_images, wd=wd, sleep_between_interactions=0.5) - -# for elem in res: -# persist_image(target_folder,elem) + + print("Saved", imagenr, "images") + \ No newline at end of file diff --git a/notebooks/Block_5/Jupyter Notebook Block 5 - Object Detection and Segmentation.ipynb b/notebooks/Block_5/Jupyter Notebook Block 5 - Object Detection and Segmentation.ipynb index d291f71f0437b3b0c770866f17b50e5b26985d4f..53851c225324b8d980aa6e06963092623be9051e 100644 --- a/notebooks/Block_5/Jupyter Notebook Block 5 - Object Detection and Segmentation.ipynb +++ b/notebooks/Block_5/Jupyter Notebook Block 5 - Object Detection and Segmentation.ipynb @@ -28,181 +28,57 @@ }, "outputs": [ { - "ename": "KeyboardInterrupt", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m<ipython-input-1-83a71479ebf0>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 145\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 146\u001b[0m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msimple_image_download\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 147\u001b[0;31m \u001b[0mresponse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdownload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mqueries\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlimit\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdownload_folder\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m<ipython-input-1-83a71479ebf0>\u001b[0m in \u001b[0;36mdownload\u001b[0;34m(self, keywords, limit, download_folder)\u001b[0m\n\u001b[1;32m 99\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 100\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 101\u001b[0;31m \u001b[0mr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mrequests\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobject_raw\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mallow_redirects\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 102\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfilename\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'wb'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwrite\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcontent\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 103\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/requests/api.py\u001b[0m in \u001b[0;36mget\u001b[0;34m(url, params, **kwargs)\u001b[0m\n\u001b[1;32m 74\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 75\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msetdefault\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'allow_redirects'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 76\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mrequest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'get'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mparams\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 77\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 78\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/requests/api.py\u001b[0m in \u001b[0;36mrequest\u001b[0;34m(method, url, **kwargs)\u001b[0m\n\u001b[1;32m 59\u001b[0m \u001b[0;31m# cases, and look like a memory leak in others.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 60\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0msessions\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSession\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0msession\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 61\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0msession\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 62\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 63\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/requests/sessions.py\u001b[0m in \u001b[0;36mrequest\u001b[0;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[0m\n\u001b[1;32m 528\u001b[0m }\n\u001b[1;32m 529\u001b[0m \u001b[0msend_kwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msettings\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 530\u001b[0;31m \u001b[0mresp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprep\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0msend_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 531\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 532\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/requests/sessions.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, request, **kwargs)\u001b[0m\n\u001b[1;32m 641\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 642\u001b[0m \u001b[0;31m# Send the request\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 643\u001b[0;31m \u001b[0mr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0madapter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 644\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 645\u001b[0m \u001b[0;31m# Total elapsed time of the request (approximately)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/requests/adapters.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[1;32m 447\u001b[0m \u001b[0mdecode_content\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 448\u001b[0m \u001b[0mretries\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmax_retries\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 449\u001b[0;31m \u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 450\u001b[0m )\n\u001b[1;32m 451\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/urllib3/connectionpool.py\u001b[0m in \u001b[0;36murlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)\u001b[0m\n\u001b[1;32m 675\u001b[0m \u001b[0mbody\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mbody\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 676\u001b[0m \u001b[0mheaders\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mheaders\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 677\u001b[0;31m \u001b[0mchunked\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mchunked\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 678\u001b[0m )\n\u001b[1;32m 679\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/urllib3/connectionpool.py\u001b[0m in \u001b[0;36m_make_request\u001b[0;34m(self, conn, method, url, timeout, chunked, **httplib_request_kw)\u001b[0m\n\u001b[1;32m 424\u001b[0m \u001b[0;31m# Python 3 (including for exceptions like SystemExit).\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 425\u001b[0m \u001b[0;31m# Otherwise it looks like a bug in the code.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 426\u001b[0;31m \u001b[0msix\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mraise_from\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 427\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mSocketTimeout\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mBaseSSLError\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mSocketError\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 428\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_raise_timeout\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0merr\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimeout_value\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mread_timeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/urllib3/packages/six.py\u001b[0m in \u001b[0;36mraise_from\u001b[0;34m(value, from_value)\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/urllib3/connectionpool.py\u001b[0m in \u001b[0;36m_make_request\u001b[0;34m(self, conn, method, url, timeout, chunked, **httplib_request_kw)\u001b[0m\n\u001b[1;32m 419\u001b[0m \u001b[0;31m# Python 3\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 420\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 421\u001b[0;31m \u001b[0mhttplib_response\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mconn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgetresponse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 422\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mBaseException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 423\u001b[0m \u001b[0;31m# Remove the TypeError from the exception chain in\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/http/client.py\u001b[0m in \u001b[0;36mgetresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1342\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1343\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1344\u001b[0;31m \u001b[0mresponse\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbegin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1345\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mConnectionError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1346\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/http/client.py\u001b[0m in \u001b[0;36mbegin\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 304\u001b[0m \u001b[0;31m# read until we get a non-100 response\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 305\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 306\u001b[0;31m \u001b[0mversion\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstatus\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreason\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_read_status\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 307\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mstatus\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mCONTINUE\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 308\u001b[0m \u001b[0;32mbreak\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/http/client.py\u001b[0m in \u001b[0;36m_read_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 265\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 266\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_read_status\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 267\u001b[0;31m \u001b[0mline\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreadline\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_MAXLINE\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"iso-8859-1\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 268\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mline\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0m_MAXLINE\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 269\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mLineTooLong\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"status line\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/socket.py\u001b[0m in \u001b[0;36mreadinto\u001b[0;34m(self, b)\u001b[0m\n\u001b[1;32m 587\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 588\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 589\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sock\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrecv_into\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mb\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 590\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mtimeout\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 591\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_timeout_occurred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/urllib3/contrib/pyopenssl.py\u001b[0m in \u001b[0;36mrecv_into\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 311\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mrecv_into\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 312\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 313\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconnection\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrecv_into\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 314\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mOpenSSL\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSSL\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSysCallError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 315\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msuppress_ragged_eofs\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margs\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"Unexpected EOF\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/OpenSSL/SSL.py\u001b[0m in \u001b[0;36mrecv_into\u001b[0;34m(self, buffer, nbytes, flags)\u001b[0m\n\u001b[1;32m 1837\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_lib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSSL_peek\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_ssl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbuf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnbytes\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1838\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1839\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_lib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSSL_read\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_ssl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbuf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnbytes\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1840\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_raise_ssl_error\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_ssl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1841\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + "name": "stdout", + "output_type": "stream", + "text": [ + "Found 10 image links\n", + "Saved 10 images\n", + "Found 10 image links\n", + "Saved 10 images\n", + "Found 10 image links\n", + "Saved 10 images\n", + "Found 10 image links\n", + "Saved 10 images\n", + "Found 10 image links\n", + "Saved 10 images\n", + "Found 10 image links\n", + "Saved 10 images\n", + "Found 10 image links\n", + "ERROR - Could not save https://s.france24.com/media/display/525459f2-00b7-11ea-82ed-005056a98db9/w:1280/p:16x9/FRANCE-CINEMA-DENEUVE-HOSPITAL-reuters-M.webp - cannot identify image file <_io.BytesIO object at 0x7f693234e290>\n", + "Saved 9 images\n", + "Found 10 image links\n", + "Saved 10 images\n" ] } ], "source": [ + "from selenium import webdriver\n", + "from selenium.webdriver.firefox.options import Options\n", + "from Image_crawling import Image_crawling\n", "import os\n", - "import time\n", - "import urllib\n", - "import requests\n", - "from urllib.parse import quote\n", - "import array as arr\n", - "\n", "\n", "# Specifiy the queries\n", - "queries = \"brad pitt, johnny depp, leonardo dicaprio, robert de niro, angelina jolie, sandra bullock, catherine deneuve, marion cotillard\"\n", + "queries = [\"brad pitt\",\"johnny depp\", \"leonardo dicaprio\", \"robert de niro\", \"angelina jolie\", \"sandra bullock\", \"catherine deneuve\", \"marion cotillard\"]\n", "limit = 10\n", "download_folder = \"./brandnew_images/train/\"\n", + "waittime = 0.1 # Time to wait between actions, depends on the number of pictures you want to crawl. More pictures means you need to wait longer for them to load. \n", + "\n", + "# Set options\n", + "options = webdriver.FirefoxOptions()\n", + "options.add_argument('--headless')\n", "\n", + "# Create Driver\n", + "driver = webdriver.Firefox(options=options, executable_path=\"/usr/bin/geckodriver\")\n", "\n", - "class simple_image_download:\n", - " def __init__(self):\n", - " pass\n", + "# create instance of crawler\n", + "image_crawling = Image_crawling(driver, waittime=waittime)\n", "\n", - " def urls(self, keywords, limit, download_folder):\n", - " keyword_to_search = [str(item).strip() for item in keywords.split(',')]\n", - " i = 0\n", - " links = []\n", - " while i < len(keyword_to_search):\n", - " url = 'https://www.google.com/search?q=' + quote(\n", - " keyword_to_search[i].encode(\n", - " 'utf-8')) + '&biw=1536&bih=674&tbm=isch&sxsrf=ACYBGNSXXpS6YmAKUiLKKBs6xWb4uUY5gA:1581168823770&source=lnms&sa=X&ved=0ahUKEwioj8jwiMLnAhW9AhAIHbXTBMMQ_AUI3QUoAQ'\n", - " raw_html = self._download_page(url)\n", - "\n", - " end_object = -1;\n", - "\n", - " j = 0\n", - " while j < limit:\n", - " while (True):\n", - " try:\n", - " new_line = raw_html.find('\"https://', end_object + 1)\n", - " end_object = raw_html.find('\"', new_line + 1)\n", - "\n", - " buffor = raw_html.find('\\\\', new_line + 1, end_object)\n", - " if buffor != -1:\n", - " object_raw = (raw_html[new_line + 1:buffor])\n", - " else:\n", - " object_raw = (raw_html[new_line + 1:end_object])\n", - "\n", - " if '.jpg' in object_raw or 'png' in object_raw or '.ico' in object_raw or '.gif' in object_raw or '.jpeg' in object_raw:\n", - " break\n", - "\n", - " except Exception as e:\n", - " print(e)\n", - " break\n", - "\n", - " links.append(object_raw)\n", - " j += 1\n", - "\n", - " i += 1\n", - " return(links)\n", - "\n", - "\n", - " def download(self, keywords, limit, download_folder):\n", - " keyword_to_search = [str(item).strip() for item in keywords.split(',')]\n", - " main_directory = download_folder\n", - " i = 0\n", - "\n", - " while i < len(keyword_to_search):\n", - " self._create_directories(main_directory, keyword_to_search[i])\n", - " url = 'https://www.google.com/search?q=' + quote(\n", - " keyword_to_search[i].encode('utf-8')) + '&biw=1536&bih=674&tbm=isch&sxsrf=ACYBGNSXXpS6YmAKUiLKKBs6xWb4uUY5gA:1581168823770&source=lnms&sa=X&ved=0ahUKEwioj8jwiMLnAhW9AhAIHbXTBMMQ_AUI3QUoAQ'\n", - " raw_html = self._download_page(url)\n", - "\n", - " end_object = -1;\n", - "\n", - " j = 0\n", - " while j < limit:\n", - " while (True):\n", - " try:\n", - " new_line = raw_html.find('\"https://', end_object + 1)\n", - " end_object = raw_html.find('\"', new_line + 1)\n", - "\n", - " buffor = raw_html.find('\\\\', new_line + 1, end_object)\n", - " if buffor != -1:\n", - " object_raw = (raw_html[new_line+1:buffor])\n", - " else:\n", - " object_raw = (raw_html[new_line+1:end_object])\n", - "\n", - " if '.jpg' in object_raw or 'png' in object_raw or '.ico' in object_raw or '.gif' in object_raw or '.jpeg' in object_raw:\n", - " break\n", - "\n", - " except Exception as e:\n", - " print(e)\n", - " break\n", - "\n", - " path = main_directory + keyword_to_search[i]\n", - "\n", - " #print(object_raw)\n", - "\n", - " if not os.path.exists(path):\n", - " os.makedirs(path)\n", - "\n", - " filename = str(keyword_to_search[i]) + \"_\" + str(j + 1) + \".jpg\"\n", - "\n", - " try:\n", - " r = requests.get(object_raw, allow_redirects=True)\n", - " open(os.path.join(path, filename), 'wb').write(r.content)\n", - " except Exception as e:\n", - " print(e)\n", - " j -= 1\n", - " j += 1\n", - "\n", - " i += 1\n", - "\n", - "\n", - " def _create_directories(self, main_directory, name):\n", - " try:\n", - " if not os.path.exists(main_directory):\n", - " os.makedirs(main_directory)\n", - " time.sleep(0.2)\n", - " path = (name)\n", - " sub_directory = os.path.join(main_directory, path)\n", - " if not os.path.exists(sub_directory):\n", - " os.makedirs(sub_directory)\n", - " else:\n", - " path = (name)\n", - " sub_directory = os.path.join(main_directory, path)\n", - " if not os.path.exists(sub_directory):\n", - " os.makedirs(sub_directory)\n", - "\n", - " except OSError as e:\n", - " if e.errno != 17:\n", - " raise\n", - " pass\n", - " return\n", - "\n", - " def _download_page(self,url):\n", - "\n", - " try:\n", - " headers = {}\n", - " headers['User-Agent'] = \"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36\"\n", - " req = urllib.request.Request(url, headers=headers)\n", - " resp = urllib.request.urlopen(req)\n", - " respData = str(resp.read())\n", - " return respData\n", - "\n", - " except Exception as e:\n", - " print(e)\n", - " exit(0)\n", - " \n", - "response = simple_image_download\n", - "response().download(queries, limit, download_folder)" + "for query in queries:\n", + " # Craws image urls:\n", + " image_urls = image_crawling.fetch_image_urls(query, limit)\n", + " \n", + " # download images\n", + " image_crawling.download_image(download_folder + query)\n" ] }, { diff --git a/notebooks/Block_5/Seleniumtest/Download_Images.ipynb b/notebooks/Block_5/Seleniumtest/Download_Images.ipynb deleted file mode 100644 index e1208d19aea89f3d56bbd5e41b38a7115da483ba..0000000000000000000000000000000000000000 --- a/notebooks/Block_5/Seleniumtest/Download_Images.ipynb +++ /dev/null @@ -1,75 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "ename": "ModuleNotFoundError", - "evalue": "No module named 'Image'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m<ipython-input-2-84df93c71169>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mselenium\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mwebdriver\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mselenium\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwebdriver\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfirefox\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptions\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mOptions\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mImage_crawling\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mImage_crawling\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;31m# Set options\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/work/hslu-deep-learning/notebooks/Block_5/Seleniumtest/Image_crawling.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mos\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mio\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mImage\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 10\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mrequests\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'Image'" - ] - } - ], - "source": [ - "from selenium import webdriver\n", - "from selenium.webdriver.firefox.options import Options\n", - "from Image_crawling import Image_crawling\n", - "\n", - "# Set options\n", - "options = webdriver.FirefoxOptions() \n", - "options = webdriver.FirefoxOptions()\n", - "options.add_argument('--headless')\n", - "\n", - "# Create Driver\n", - "driver = webdriver.Firefox(options=options, executable_path=\"/usr/bin/geckodriver\")\n", - "\n", - "# create instance of crawler\n", - "image_crawling = Image_crawling(driver)\n", - "\n", - "# Craws image urls:\n", - "image_urls = image_crawling.fetch_image_urls(\"sailing\", 10)\n", - "print(image_urls)\n", - "\n", - "# download images\n", - "image_crawling.download_image(\"./images\")\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.6" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/notebooks/Block_5/Seleniumtest/GoogleImageExtractor.py b/notebooks/Block_5/Seleniumtest/GoogleImageExtractor.py deleted file mode 100644 index 87e94c91b733174f75b110f8d82b22186fa37e18..0000000000000000000000000000000000000000 --- a/notebooks/Block_5/Seleniumtest/GoogleImageExtractor.py +++ /dev/null @@ -1,205 +0,0 @@ -""" Google image Extractor based on Selenium. -taken from: -https://simply-python.com/2015/05/18/saving-images-from-google-search-using-selenium-and-python/ -See also: -https://github.com/scirag/selenium-image-crawler -""" -import re, os, sys, datetime, time -import pandas -from selenium import webdriver -from contextlib import closing -from selenium.webdriver import Firefox -from selenium.webdriver.support.ui import WebDriverWait -from selenium.webdriver.common.by import By -from selenium.webdriver.support import expected_conditions as EC - -# from pattern.web import URL, extension, cache, plaintext, Newsfeed, DOM - -class GoogleImageExtractor(object): - - def __init__(self, search_key = '' ): - """ Google image search class - Args: - search_key to be entered. - - """ - if type(search_key) == str: - ## convert to list even for one search keyword to standalize the pulling. - self.g_search_key_list = [search_key] - elif type(search_key) == list: - self.g_search_key_list = search_key - else: - print('google_search_keyword not of type str or list') - raise - - self.g_search_key = '' - - ## user options - self.image_dl_per_search = 200 - - ## url construct string text - self.prefix_of_search_url = "https://www.google.com.sg/search?q=" - self.postfix_of_search_url = '&source=lnms&tbm=isch&sa=X&ei=0eZEVbj3IJG5uATalICQAQ&ved=0CAcQ_AUoAQ&biw=939&bih=591'# non changable text - self.target_url_str = '' - - ## storage - self.pic_url_list = [] - self.pic_info_list = [] - - ## file and folder path - self.folder_main_dir_prefix = r'C:\data\temp\gimage_pic' - - def reformat_search_for_spaces(self): - """ - Method call immediately at the initialization stages - get rid of the spaces and replace by the "+" - Use in search term. Eg: "Cookie fast" to "Cookie+fast" - - steps: - strip any lagging spaces if present - replace the self.g_search_key - """ - self.g_search_key = self.g_search_key.rstrip().replace(' ', '+') - - def set_num_image_to_dl(self, num_image): - """ Set the number of image to download. Set to self.image_dl_per_search. - Args: - num_image (int): num of image to download. - """ - self.image_dl_per_search = num_image - - def get_searchlist_fr_file(self, filename): - """Get search list from filename. Ability to add in a lot of phrases. - Will replace the self.g_search_key_list - Args: - filename (str): full file path - """ - with open(filename,'r') as f: - self.g_search_key_list = f.readlines() - - def formed_search_url(self): - ''' Form the url either one selected key phrases or multiple search items. - Get the url from the self.g_search_key_list - Set to self.sp_search_url_list - ''' - self.reformat_search_for_spaces() - self.target_url_str = self.prefix_of_search_url + self.g_search_key +\ - self.postfix_of_search_url - - def retrieve_source_fr_html(self): - """ Make use of selenium. Retrieve from html table using pandas table. - - """ - driver = webdriver.Firefox() - driver.get(self.target_url_str) - - ## wait for log in then get the page source. - try: - driver.execute_script("window.scrollTo(0, 30000)") - time.sleep(2) - self.temp_page_source = driver.page_source - #driver.find_element_by_css_selector('ksb _kvc').click()#cant find the class - driver.find_element_by_id('smb').click() #ok - time.sleep(2) - driver.execute_script("window.scrollTo(0, 60000)") - time.sleep(2) - driver.execute_script("window.scrollTo(0, 60000)") - - except: - print('not able to find') - driver.quit() - - self.page_source = driver.page_source - - driver.close() - - def extract_pic_url(self): - """ extract all the raw pic url in list - - """ - dom = DOM(self.page_source) - tag_list = dom('a.rg_l') - - for tag in tag_list[:self.image_dl_per_search]: - tar_str = re.search('imgurl=(.*)&imgrefurl', tag.attributes['href']) - try: - self.pic_url_list.append(tar_str.group(1)) - except: - print('error parsing', tag) - - def multi_search_download(self): - """ Mutli search download""" - for indiv_search in self.g_search_key_list: - self.pic_url_list = [] - self.pic_info_list = [] - - self.g_search_key = indiv_search - - self.formed_search_url() - self.retrieve_source_fr_html() - self.extract_pic_url() - self.downloading_all_photos() #some download might not be jpg?? use selnium to download?? - self.save_infolist_to_file() - - def downloading_all_photos(self): - """ download all photos to particular folder - - """ - self.create_folder() - pic_counter = 1 - for url_link in self.pic_url_list: - print(pic_counter) - pic_prefix_str = self.g_search_key + str(pic_counter) - self.download_single_image(url_link.encode(), pic_prefix_str) - pic_counter = pic_counter +1 - - def download_single_image(self, url_link, pic_prefix_str): - """ Download data according to the url link given. - Args: - url_link (str): url str. - pic_prefix_str (str): pic_prefix_str for unique label the pic - """ - self.download_fault = 0 - file_ext = os.path.splitext(url_link)[1] #use for checking valid pic ext - temp_filename = pic_prefix_str + file_ext - temp_filename_full_path = os.path.join(self.gs_raw_dirpath, temp_filename ) - - valid_image_ext_list = ['.png','.jpg','.jpeg', '.gif', '.bmp', '.tiff'] #not comprehensive - - url = URL(url_link) - if url.redirect: - return # if there is re-direct, return - - if file_ext not in valid_image_ext_list: - return #return if not valid image extension - - f = open(temp_filename_full_path, 'wb') # save as test.gif - print(url_link) - self.pic_info_list.append(pic_prefix_str + ': ' + url_link ) - try: - f.write(url.download())#if have problem skip - except: - #if self.__print_download_fault: - print('Problem with processing this data: ', url_link) - self.download_fault =1 - f.close() - - def create_folder(self): - """ - Create a folder to put the log data segregate by date - - """ - self.gs_raw_dirpath = os.path.join(self.folder_main_dir_prefix, time.strftime("_%d_%b%y", time.localtime())) - if not os.path.exists(self.gs_raw_dirpath): - os.makedirs(self.gs_raw_dirpath) - - def save_infolist_to_file(self): - """ Save the info list to file. - - """ - temp_filename_full_path = os.path.join(self.gs_raw_dirpath, self.g_search_key + '_info.txt' ) - - with open(temp_filename_full_path, 'w') as f: - for n in self.pic_info_list: - f.write(n) - f.write('\n') \ No newline at end of file diff --git a/notebooks/Block_5/Seleniumtest/Selenium_test.ipynb b/notebooks/Block_5/Seleniumtest/Selenium_test.ipynb deleted file mode 100644 index 6a1f410c3c9f9f0a575336593265977de709614c..0000000000000000000000000000000000000000 --- a/notebooks/Block_5/Seleniumtest/Selenium_test.ipynb +++ /dev/null @@ -1,304 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Selenium using Firefox webdriver\n", - "Somehow also refers to/looks for geckodrive" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "ename": "WebDriverException", - "evalue": "Message: Process unexpectedly closed with status 1\n", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mWebDriverException\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m<ipython-input-8-3f533b936151>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;31m# driver = webdriver.Geckodriver()\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 12\u001b[0;31m \u001b[0mdriver\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mwebdriver\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mFirefox\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexecutable_path\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'/usr/bin/geckodriver'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/selenium/webdriver/firefox/webdriver.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, firefox_profile, firefox_binary, timeout, capabilities, proxy, executable_path, options, service_log_path, firefox_options, service_args, desired_capabilities, log_path, keep_alive)\u001b[0m\n\u001b[1;32m 172\u001b[0m \u001b[0mcommand_executor\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mexecutor\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 173\u001b[0m \u001b[0mdesired_capabilities\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcapabilities\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 174\u001b[0;31m keep_alive=True)\n\u001b[0m\u001b[1;32m 175\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 176\u001b[0m \u001b[0;31m# Selenium remote\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, command_executor, desired_capabilities, browser_profile, proxy, keep_alive, file_detector, options)\u001b[0m\n\u001b[1;32m 155\u001b[0m warnings.warn(\"Please use FirefoxOptions to set browser profile\",\n\u001b[1;32m 156\u001b[0m DeprecationWarning, stacklevel=2)\n\u001b[0;32m--> 157\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstart_session\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcapabilities\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbrowser_profile\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 158\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_switch_to\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mSwitchTo\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 159\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_mobile\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mMobile\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py\u001b[0m in \u001b[0;36mstart_session\u001b[0;34m(self, capabilities, browser_profile)\u001b[0m\n\u001b[1;32m 250\u001b[0m parameters = {\"capabilities\": w3c_caps,\n\u001b[1;32m 251\u001b[0m \"desiredCapabilities\": capabilities}\n\u001b[0;32m--> 252\u001b[0;31m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mCommand\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mNEW_SESSION\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparameters\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 253\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m'sessionId'\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mresponse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 254\u001b[0m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mresponse\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'value'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py\u001b[0m in \u001b[0;36mexecute\u001b[0;34m(self, driver_command, params)\u001b[0m\n\u001b[1;32m 319\u001b[0m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcommand_executor\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdriver_command\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 320\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mresponse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 321\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0merror_handler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcheck_response\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresponse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 322\u001b[0m response['value'] = self._unwrap_value(\n\u001b[1;32m 323\u001b[0m response.get('value', None))\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/selenium/webdriver/remote/errorhandler.py\u001b[0m in \u001b[0;36mcheck_response\u001b[0;34m(self, response)\u001b[0m\n\u001b[1;32m 240\u001b[0m \u001b[0malert_text\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'alert'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'text'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 241\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mexception_class\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmessage\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscreen\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstacktrace\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0malert_text\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 242\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mexception_class\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmessage\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscreen\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstacktrace\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 243\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 244\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_value_or_default\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdefault\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mWebDriverException\u001b[0m: Message: Process unexpectedly closed with status 1\n" - ] - } - ], - "source": [ - "# Problem with getting this webdriver in current path or installed at all for that matter, could try to just copy the file into gitlab\n", - "# https://medium.com/cs-note/web-crawling-by-using-selenium-python-3-4fff0bdb4c65\n", - "from selenium import webdriver\n", - "from selenium.webdriver.common.by import By\n", - "from selenium.webdriver.common.keys import Keys\n", - "from selenium.webdriver.support.ui import Select\n", - "from selenium.common.exceptions import NoSuchElementException\n", - "from selenium.common.exceptions import NoAlertPresentException\n", - "import unittest, time, re\n", - "\n", - "# driver = webdriver.Geckodriver()\n", - "driver = webdriver.Firefox(executable_path='/usr/bin/geckodriver')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Selenium defining Driver path\n", - "Divines driver path, means could be both i assume. \n" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "ename": "WebDriverException", - "evalue": "Message: Process unexpectedly closed with status 1\n", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mWebDriverException\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m<ipython-input-6-29c58177a12a>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0mservice\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mService\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mDRIVER_PATH\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0mservice\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstart\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 11\u001b[0;31m \u001b[0mwd\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mwebdriver\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mRemote\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mservice\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mservice_url\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 12\u001b[0m \u001b[0mwd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mquit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, command_executor, desired_capabilities, browser_profile, proxy, keep_alive, file_detector, options)\u001b[0m\n\u001b[1;32m 155\u001b[0m warnings.warn(\"Please use FirefoxOptions to set browser profile\",\n\u001b[1;32m 156\u001b[0m DeprecationWarning, stacklevel=2)\n\u001b[0;32m--> 157\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstart_session\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcapabilities\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbrowser_profile\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 158\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_switch_to\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mSwitchTo\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 159\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_mobile\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mMobile\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py\u001b[0m in \u001b[0;36mstart_session\u001b[0;34m(self, capabilities, browser_profile)\u001b[0m\n\u001b[1;32m 250\u001b[0m parameters = {\"capabilities\": w3c_caps,\n\u001b[1;32m 251\u001b[0m \"desiredCapabilities\": capabilities}\n\u001b[0;32m--> 252\u001b[0;31m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mCommand\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mNEW_SESSION\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparameters\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 253\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m'sessionId'\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mresponse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 254\u001b[0m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mresponse\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'value'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py\u001b[0m in \u001b[0;36mexecute\u001b[0;34m(self, driver_command, params)\u001b[0m\n\u001b[1;32m 319\u001b[0m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcommand_executor\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdriver_command\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 320\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mresponse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 321\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0merror_handler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcheck_response\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresponse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 322\u001b[0m response['value'] = self._unwrap_value(\n\u001b[1;32m 323\u001b[0m response.get('value', None))\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/selenium/webdriver/remote/errorhandler.py\u001b[0m in \u001b[0;36mcheck_response\u001b[0;34m(self, response)\u001b[0m\n\u001b[1;32m 240\u001b[0m \u001b[0malert_text\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'alert'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'text'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 241\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mexception_class\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmessage\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscreen\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstacktrace\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0malert_text\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 242\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mexception_class\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmessage\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscreen\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstacktrace\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 243\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 244\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_value_or_default\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdefault\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mWebDriverException\u001b[0m: Message: Process unexpectedly closed with status 1\n" - ] - } - ], - "source": [ - "# Same problem, driver needs to be in the path somehow. \n", - "# https://medium.com/swlh/web-scraping-stock-images-using-google-selenium-and-python-8b825ba649b9\n", - "import selenium\n", - "from selenium import webdriver\n", - "from selenium.webdriver.chrome.service import Service\n", - "# DRIVER_PATH = '/home/work/hslu-deep-learning/notebooks/Block_5/Seleniumtest/geckodriver'\n", - "DRIVER_PATH = '/usr/bin/geckodriver'\n", - "\n", - "service = Service(DRIVER_PATH)\n", - "service.start()\n", - "wd = webdriver.Remote(service.service_url)\n", - "wd.quit()" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "https://pixel.quantserve.com/pixel/p-f8oruOqDFlMeI.gif\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-new-post.13ab64f9f36ad8f25ae3544b350e2ae1.svg\n", - "https://s.imgur.com/images/favicon-32x32.png\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-search.8d0f9b564a4659d48d8eca38b968a7f2.svg\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-emerald-hero.01efdc1ccdadae307801b620cfa17756.png\n", - "https://s.imgur.com/desktop-assets/desktop-assets/avatar-peanut-butter.9e588caafea04959912c1e048ba87d7f.png\n", - "https://s.imgur.com/desktop-assets/desktop-assets/avatar-toaster.b497f4c2ec340d8ec112674228cf1e78.png\n", - "https://s.imgur.com/desktop-assets/desktop-assets/avatar-alien.1c7563678882fbd38fa859a0136cac7d.png\n", - "https://s.imgur.com/images/favicon-32x32.png\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-pause.68f07ce1a7e07bac06d1f2c527d7a9e5.svg\n", - "https://i.imgur.com/YPfpL7S_d.webp?maxwidth=520&shape=thumb&fidelity=high\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", - "https://i.imgur.com/dC6K5SR_d.webp?maxwidth=520&shape=thumb&fidelity=high\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", - "https://i.imgur.com/Pe9nYNF_d.webp?maxwidth=520&shape=thumb&fidelity=high\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", - "https://i.imgur.com/OuGFt1z_d.webp?maxwidth=520&shape=thumb&fidelity=high\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", - "https://i.imgur.com/Clmc0LC_d.webp?maxwidth=520&shape=thumb&fidelity=high\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", - "https://i.imgur.com/8DFxOOv_d.webp?maxwidth=520&shape=thumb&fidelity=high\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", - "https://i.imgur.com/i41PPf0_d.webp?maxwidth=520&shape=thumb&fidelity=high\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", - "https://imgur.com/[object%20Object]\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", - "https://i.imgur.com/FHOSnBD_d.webp?maxwidth=520&shape=thumb&fidelity=high\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", - "https://i.imgur.com/U8r1Jh4_d.webp?maxwidth=520&shape=thumb&fidelity=high\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", - "https://imgur.com/[object%20Object]\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", - "https://imgur.com/[object%20Object]\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", - "https://imgur.com/[object%20Object]\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", - "https://i.imgur.com/PeWnuOC_d.webp?maxwidth=520&shape=thumb&fidelity=high\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", - "https://i.imgur.com/R9rehQX_d.webp?maxwidth=520&shape=thumb&fidelity=high\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", - "https://i.imgur.com/50n98gu_d.webp?maxwidth=520&shape=thumb&fidelity=high\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", - "https://i.imgur.com/xuTnHTc_d.webp?maxwidth=520&shape=thumb&fidelity=high\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", - "https://imgur.com/[object%20Object]\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", - "https://i.imgur.com/EmkImEw_d.webp?maxwidth=520&shape=thumb&fidelity=high\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", - "https://i.imgur.com/bEJM71d_d.webp?maxwidth=520&shape=thumb&fidelity=high\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", - "https://i.imgur.com/ed17rXR_d.webp?maxwidth=520&shape=thumb&fidelity=high\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", - "https://i.imgur.com/RNZpTJm_d.webp?maxwidth=520&shape=thumb&fidelity=high\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", - "https://imgur.com/[object%20Object]\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", - "https://i.imgur.com/cHEkDQY_d.webp?maxwidth=520&shape=thumb&fidelity=high\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", - "https://i.imgur.com/JbqeNVT_d.webp?maxwidth=520&shape=thumb&fidelity=high\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", - "https://s.imgur.com/images/accolades/gem.png\n", - "https://i.imgur.com/XlhvEPH_d.webp?maxwidth=520&shape=thumb&fidelity=high\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", - "https://i.imgur.com/Tcgf1MX_d.webp?maxwidth=520&shape=thumb&fidelity=high\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", - "https://i.imgur.com/5mLrcgO_d.webp?maxwidth=520&shape=thumb&fidelity=high\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-chat-filled.b12fed0067e4ce444f710411ee6a70e1.svg\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-eye.bf703aed21e47a30269da39879c03ccf.svg\n", - "https://s.imgur.com/desktop-assets/desktop-assets/baby-yoda.37513d23dae2853e3270ffbf0f262563.svg\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-close-outline.3f046db44f7214dba26a6218e3250e44.svg\n", - "https://s.imgur.com/desktop-assets/desktop-assets/icon-points.5867bfb88971853dcfdf49b18f8455b9.svg\n" - ] - } - ], - "source": [ - "from selenium import webdriver\n", - "\n", - "\n", - "options = webdriver.FirefoxOptions()\n", - "options.add_argument('--ignore-certificate-errors')\n", - "options.add_argument(\"--test-type\")\n", - "options.add_argument('--headless')\n", - "options.executable_path = \"/usr/bin/geckodriver\"\n", - "driver = webdriver.Firefox(options=options)\n", - "\n", - "driver.get('https://imgur.com/')\n", - "\n", - "images = driver.find_elements_by_tag_name('img')\n", - "for image in images:\n", - " print(image.get_attribute('src'))\n", - "\n", - "driver.close()\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Following Medium :\n", - "https://medium.com/@igorzabukovec/automate-web-crawling-with-selenium-python-part-1-85113660de96" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "from selenium import webdriver\n", - "from selenium.webdriver.firefox.options import Options\n", - "\n", - "options = webdriver.FirefoxOptions()\n", - "options.add_argument('--headless')\n", - "\n", - "driver = webdriver.Firefox(options=options, executable_path=\"/usr/bin/geckodriver\")\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Following towards data science:\n", - "https://towardsdatascience.com/image-scraping-with-python-a96feda8af2d" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "ename": "WebDriverException", - "evalue": "Message: Process unexpectedly closed with status 1\n", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mWebDriverException\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m<ipython-input-4-a8e68fd1e84e>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mDRIVER_PATH\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"/usr/bin/geckodriver\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mwd\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mwebdriver\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mFirefox\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexecutable_path\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mDRIVER_PATH\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/selenium/webdriver/firefox/webdriver.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, firefox_profile, firefox_binary, timeout, capabilities, proxy, executable_path, options, service_log_path, firefox_options, service_args, desired_capabilities, log_path, keep_alive)\u001b[0m\n\u001b[1;32m 172\u001b[0m \u001b[0mcommand_executor\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mexecutor\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 173\u001b[0m \u001b[0mdesired_capabilities\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcapabilities\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 174\u001b[0;31m keep_alive=True)\n\u001b[0m\u001b[1;32m 175\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 176\u001b[0m \u001b[0;31m# Selenium remote\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, command_executor, desired_capabilities, browser_profile, proxy, keep_alive, file_detector, options)\u001b[0m\n\u001b[1;32m 155\u001b[0m warnings.warn(\"Please use FirefoxOptions to set browser profile\",\n\u001b[1;32m 156\u001b[0m DeprecationWarning, stacklevel=2)\n\u001b[0;32m--> 157\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstart_session\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcapabilities\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbrowser_profile\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 158\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_switch_to\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mSwitchTo\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 159\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_mobile\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mMobile\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py\u001b[0m in \u001b[0;36mstart_session\u001b[0;34m(self, capabilities, browser_profile)\u001b[0m\n\u001b[1;32m 250\u001b[0m parameters = {\"capabilities\": w3c_caps,\n\u001b[1;32m 251\u001b[0m \"desiredCapabilities\": capabilities}\n\u001b[0;32m--> 252\u001b[0;31m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mCommand\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mNEW_SESSION\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparameters\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 253\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m'sessionId'\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mresponse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 254\u001b[0m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mresponse\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'value'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py\u001b[0m in \u001b[0;36mexecute\u001b[0;34m(self, driver_command, params)\u001b[0m\n\u001b[1;32m 319\u001b[0m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcommand_executor\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdriver_command\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparams\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 320\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mresponse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 321\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0merror_handler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcheck_response\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresponse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 322\u001b[0m response['value'] = self._unwrap_value(\n\u001b[1;32m 323\u001b[0m response.get('value', None))\n", - "\u001b[0;32m/opt/conda/lib/python3.7/site-packages/selenium/webdriver/remote/errorhandler.py\u001b[0m in \u001b[0;36mcheck_response\u001b[0;34m(self, response)\u001b[0m\n\u001b[1;32m 240\u001b[0m \u001b[0malert_text\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'alert'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'text'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 241\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mexception_class\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmessage\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscreen\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstacktrace\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0malert_text\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 242\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mexception_class\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmessage\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscreen\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstacktrace\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 243\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 244\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_value_or_default\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdefault\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mWebDriverException\u001b[0m: Message: Process unexpectedly closed with status 1\n" - ] - } - ], - "source": [ - "from selenium import webdriver\n", - "\n", - "DRIVER_PATH = \"/usr/bin/geckodriver\"\n", - "wd = webdriver.Firefox(executable_path=DRIVER_PATH)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.6" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/notebooks/Block_5/Seleniumtest/backup/chromedriver.exe b/notebooks/Block_5/Seleniumtest/backup/chromedriver.exe deleted file mode 100755 index 6ee2a44f820cac1b5d8b4a067ec15c5298e4b563..0000000000000000000000000000000000000000 Binary files a/notebooks/Block_5/Seleniumtest/backup/chromedriver.exe and /dev/null differ diff --git a/notebooks/Block_5/Seleniumtest/backup/chromedriver_linux64.zip b/notebooks/Block_5/Seleniumtest/backup/chromedriver_linux64.zip deleted file mode 100644 index c3a888c952398cc8dde783d4ca3db3426f1a9743..0000000000000000000000000000000000000000 Binary files a/notebooks/Block_5/Seleniumtest/backup/chromedriver_linux64.zip and /dev/null differ diff --git a/notebooks/Block_5/Seleniumtest/backup/geckodriver-v0.24.0-linux64.tar.gz b/notebooks/Block_5/Seleniumtest/backup/geckodriver-v0.24.0-linux64.tar.gz deleted file mode 100644 index f85fb6032ec71cb59cc0a802229ba6271222ad8c..0000000000000000000000000000000000000000 Binary files a/notebooks/Block_5/Seleniumtest/backup/geckodriver-v0.24.0-linux64.tar.gz and /dev/null differ diff --git a/notebooks/Block_5/Seleniumtest/backup/geckodriver.exe b/notebooks/Block_5/Seleniumtest/backup/geckodriver.exe deleted file mode 100755 index bac836bb55f7bec9306c70d7be0e223cef468e3e..0000000000000000000000000000000000000000 Binary files a/notebooks/Block_5/Seleniumtest/backup/geckodriver.exe and /dev/null differ diff --git a/notebooks/Block_5/brandnew_images/train/brad pitt/brad pitt_1.jpg b/notebooks/Block_5/brandnew_images/train/brad pitt/brad pitt_1.jpg deleted file mode 100644 index e6b5054a20b0d6ef9ed2d9a10d475a267d5927f7..0000000000000000000000000000000000000000 Binary files a/notebooks/Block_5/brandnew_images/train/brad pitt/brad pitt_1.jpg and /dev/null differ diff --git a/notebooks/Block_5/brandnew_images/train/brad pitt/brad pitt_2.jpg b/notebooks/Block_5/brandnew_images/train/brad pitt/brad pitt_2.jpg deleted file mode 100644 index c6ca844c28ec7741d6aaf616acff6f43bd530529..0000000000000000000000000000000000000000 Binary files a/notebooks/Block_5/brandnew_images/train/brad pitt/brad pitt_2.jpg and /dev/null differ diff --git a/notebooks/Block_5/brandnew_images/train/brad pitt/brad pitt_3.jpg b/notebooks/Block_5/brandnew_images/train/brad pitt/brad pitt_3.jpg deleted file mode 100644 index 30616e7c13be35b56ac8fcd5829a7dedc1ab3d84..0000000000000000000000000000000000000000 Binary files a/notebooks/Block_5/brandnew_images/train/brad pitt/brad pitt_3.jpg and /dev/null differ diff --git a/notebooks/Block_5/brandnew_images/train/brad pitt/brad pitt_4.jpg b/notebooks/Block_5/brandnew_images/train/brad pitt/brad pitt_4.jpg deleted file mode 100644 index d2a958f032f6a8b7382b41fd3241444343b01f09..0000000000000000000000000000000000000000 Binary files a/notebooks/Block_5/brandnew_images/train/brad pitt/brad pitt_4.jpg and /dev/null differ diff --git a/notebooks/Block_5/brandnew_images/train/brad pitt/brad pitt_5.jpg b/notebooks/Block_5/brandnew_images/train/brad pitt/brad pitt_5.jpg deleted file mode 100644 index 69b697998092613593819cc50e9496bb39b8db0c..0000000000000000000000000000000000000000 Binary files a/notebooks/Block_5/brandnew_images/train/brad pitt/brad pitt_5.jpg and /dev/null differ diff --git a/notebooks/Block_5/brandnew_images/train/brad pitt/brad pitt_6.jpg b/notebooks/Block_5/brandnew_images/train/brad pitt/brad pitt_6.jpg deleted file mode 100644 index ba825c969c5153b8d577871097de0b945b2de2d9..0000000000000000000000000000000000000000 Binary files a/notebooks/Block_5/brandnew_images/train/brad pitt/brad pitt_6.jpg and /dev/null differ diff --git a/notebooks/Block_5/brandnew_images/train/brad pitt/brad pitt_7.jpg b/notebooks/Block_5/brandnew_images/train/brad pitt/brad pitt_7.jpg deleted file mode 100644 index 0c4285587b665d59a1f40f0ae2d8e51167694f79..0000000000000000000000000000000000000000 Binary files a/notebooks/Block_5/brandnew_images/train/brad pitt/brad pitt_7.jpg and /dev/null differ diff --git a/notebooks/Block_5/brandnew_images/train/brad pitt/brad pitt_8.jpg b/notebooks/Block_5/brandnew_images/train/brad pitt/brad pitt_8.jpg deleted file mode 100644 index 03b9fc926e7aa295531b391a60944a2b17df8bb1..0000000000000000000000000000000000000000 Binary files a/notebooks/Block_5/brandnew_images/train/brad pitt/brad pitt_8.jpg and /dev/null differ diff --git a/notebooks/Block_5/brandnew_images/train/brad pitt/brad pitt_9.jpg b/notebooks/Block_5/brandnew_images/train/brad pitt/brad pitt_9.jpg deleted file mode 100644 index 8b217f9febfe8d6ec9451ffa37b8bb1e183d599a..0000000000000000000000000000000000000000 Binary files a/notebooks/Block_5/brandnew_images/train/brad pitt/brad pitt_9.jpg and /dev/null differ