💊

Web scraping Selenium Chrome PowerShell (kakaku.com)

2022/05/24に公開
  • Search on kakaku.com
  • Get Results

selenium-dotnet-..*.zip
https://github.com/SeleniumHQ/selenium/releases/tag/selenium-4.1.0

  • WebDriver.dll
  • WebDriver.Support.dll

Selenium.WebDriver.4.1.1.nupkg
↓ 拡張子変更
Selenium.WebDriver.4.1.1.zip

Selenium.WebDriver.4.1.1.zip\lib\net48\WebDriver.dll → D:\Selenium\WebDriver.dll 保存

Selenium.Support.4.1.1.nupkg
↓ 拡張子変更
Selenium.Support.4.1.1.zip

Selenium.Support.4.1.1.zip\lib\net48\WebDriver.Support.dll → D:\Selenium\WebDriver.Support.dll 保存

https://chromedriver.chromium.org/

  • chromedriver.exe

powershell
$ERROR.Clear();
Add-Type -Path 'D:\Selenium4\WebDriver.dll' ;
Add-Type -Path 'D:\Selenium4\WebDriver.Support.dll' ;

$chromedriver_path = 'D:\Selenium4' ;
<#WebDriverのオプションを定義#>
$options = New-Object OpenQA.Selenium.Chrome.ChromeOptions;
<#--ignore-certificate-errorの出力を防止する#>
$options.addArgument('test-type');
<#機能拡張停止#>
$options.addArgument('--disable-extensions');
<#最大画面サイズで開く#>
$options.addArgument('--start-maximized');


$chromeService = [OpenQA.Selenium.Chrome.ChromeDriverService]::CreateDefaultService($chromedriver_path);
$chromeService.HideCommandPromptWindow=$true;<#コマンドプロンプト非表示#>
$driver = New-Object OpenQA.Selenium.Chrome.ChromeDriver ($chromeService,$options);

<# $wait の設定 #>
[OpenQA.Selenium.Support.UI.WebDriverWait]$wait = New-Object OpenQA.Selenium.Support.UI.WebDriverWait ($driver,[System.TimeSpan]::FromSeconds(10));
'ProcessId:'+$chromeService.ProcessId;
$ERROR[0];

$driver.Navigate().GoToUrl("http://kakaku.com/");


$xpath='/html';
$wait.Until([System.Func[OpenQA.Selenium.IWebDriver, System.Boolean]] { param($driver) Try { $driver.FindElement([OpenQA.Selenium.By]::XPath($xpath)) } Catch { $null } })

$element=$driver.FindElement([OpenQA.Selenium.By]::id('query')).SendKeys("ノイズキャンセリングイヤホン");
$element=$driver.FindElement([OpenQA.Selenium.By]::id('main_search_button')).Click();

$wait.Until([System.Func[OpenQA.Selenium.IWebDriver, System.Boolean]] { param($driver) Try { $driver.FindElement([OpenQA.Selenium.By]::ClassName('p-result_item_row')) } Catch { $null } })

$elements=@($driver.FindElements([OpenQA.Selenium.By]::ClassName('p-result_item_row')));
$count=$elements.Count;
$objAry = New-Object System.Collections.Generic.List[Object];

for ($i=0; $i -lt $count; $i++){


$driver.ExecuteScript('arguments[0].scrollIntoView({behavior: "smooth", block: "center"});', $elements[$i]);

 $item=@{
"url"= $driver.FindElements([OpenQA.Selenium.By]::ClassName('p-result_item_row'))[$i].FindElements([OpenQA.Selenium.By]::TagName('a'))[0].GetAttribute("href")
"title"= $driver.FindElements([OpenQA.Selenium.By]::ClassName('p-item_name'))[$i].GetAttribute("innerText" )
"description"= $driver.FindElements([OpenQA.Selenium.By]::ClassName('p-item_lines'))[$i].GetAttribute("innerText" )
"minPrice"= $driver.FindElements([OpenQA.Selenium.By]::ClassName('p-item_price_num'))[$i].GetAttribute("innerText" )
"imgSRC"= $driver.FindElements([OpenQA.Selenium.By]::ClassName('p-item_visual_entity'))[$i].GetAttribute("src" )
}
$objAry.Add($item);
}

ConvertTo-Json $objAry;

Discussion