💊

Web scraping Selenium Chrome PowerShell (Amazon)

2022/05/22に公開
  • Search on Amazon
  • Get Results

selenium-dotnet-..*.zip
https://github.com/SeleniumHQ/selenium/releases/tag/selenium-4.1.0

  • WebDriver.dll
  • WebDriver.Support.dll

Selenium.WebDriver.4.1.1.nupkg
↓ 拡張子変更
Selenium.WebDriver.4.1.1.zip

Selenium.WebDriver.4.1.1.zip\lib\net48\WebDriver.dll → D:\Selenium\WebDriver.dll 保存

Selenium.Support.4.1.1.nupkg
↓ 拡張子変更
Selenium.Support.4.1.1.zip

Selenium.Support.4.1.1.zip\lib\net48\WebDriver.Support.dll → D:\Selenium\WebDriver.Support.dll 保存

https://chromedriver.chromium.org/

  • chromedriver.exe

powershell
$ERROR.Clear();
Add-Type -Path 'D:\Selenium\WebDriver.dll' ;
Add-Type -Path 'D:\Selenium\WebDriver.Support.dll' ;

$chromedriver_path = 'D:\Selenium' ;
<#WebDriverのオプションを定義#>
$options = New-Object OpenQA.Selenium.Chrome.ChromeOptions;
<#--ignore-certificate-errorの出力を防止する#>
$options.addArgument('test-type');
<#機能拡張停止#>
$options.addArgument('--disable-extensions');
<#最大画面サイズで開く#>
$options.addArgument('--start-maximized');


$chromeService = [OpenQA.Selenium.Chrome.ChromeDriverService]::CreateDefaultService($chromedriver_path);
$chromeService.HideCommandPromptWindow=$true;<#コマンドプロンプト非表示#>
$driver = New-Object OpenQA.Selenium.Chrome.ChromeDriver ($chromeService,$options);

<# $wait の設定 #>
[OpenQA.Selenium.Support.UI.WebDriverWait]$wait = New-Object OpenQA.Selenium.Support.UI.WebDriverWait ($driver,[System.TimeSpan]::FromSeconds(10));
'ProcessId:'+$chromeService.ProcessId;
$ERROR[0];

$driver.Navigate().GoToUrl('https://www.amazon.co.jp/');

$xpath='/html';
$wait.Until([System.Func[OpenQA.Selenium.IWebDriver, System.Boolean]] { param($driver) Try { $driver.FindElement([OpenQA.Selenium.By]::XPath($xpath)) } Catch { $null } })


$element=$driver.FindElement([OpenQA.Selenium.By]::id('twotabsearchtextbox'));
$element.Clear();
$element.SendKeys('アクティブノイズキャンセリング イヤホン');

$element=$driver.FindElement([OpenQA.Selenium.By]::id('nav-search-submit-text'));
$element.Click();



$elements = @($driver.FindElements([OpenQA.Selenium.By]::XPath('//div[@data-component-type="s-search-result"]')));

$elements[0].FindElements([OpenQA.Selenium.By]::TagName('h2'))[0].GetAttribute('innerText');
$elements[0].FindElements([OpenQA.Selenium.By]::ClassName('a-price-whole'))[0].GetAttribute('innerText');
$elements[0].FindElements([OpenQA.Selenium.By]::TagName('a'))[0].GetAttribute('href');
$elements[0].FindElements([OpenQA.Selenium.By]::TagName('img'))[0].GetAttribute('src');

$elements[0];

Discussion