🐡
【Go標準ライブラリ】スクレイピング結果をCSVに変換
内容
Golangのhttp, fileのread/write周りの動作を把握するために簡単なコードで確認。
ZennのGolangのトレンド記事をCSVで取得するコードです。
memo
package main
import (
"encoding/csv"
"encoding/json"
"fmt"
"net/http"
"os"
"time"
"go.uber.org/zap"
)
var (
logger, _ = zap.NewDevelopment()
pHost = "https://zenn.dev"
)
type Articles struct {
Articles []Article `json:"articles"`
NextPage any `json:"next_page"`
}
type Article struct {
ID int `json:"id"`
PostType string `json:"post_type"`
Title string `json:"title"`
Slug string `json:"slug"`
CommentsCount int `json:"comments_count"`
LikedCount int `json:"liked_count"`
BodyLettersCount int `json:"body_letters_count"`
ArticleType string `json:"article_type"`
Emoji string `json:"emoji"`
IsSuspendingPrivate bool `json:"is_suspending_private"`
PublishedAt time.Time `json:"published_at"`
BodyUpdatedAt time.Time `json:"body_updated_at"`
SourceRepoUpdatedAt any `json:"source_repo_updated_at"`
Pinned bool `json:"pinned"`
Path string `json:"path"`
User User `json:"user"`
Publication any `json:"publication"`
}
type User struct {
ID int `json:"id"`
Username string `json:"username"`
Name string `json:"name"`
AvatarSmallURL string `json:"avatar_small_url"`
}
func main() {
articles := Articles{}
if err := readJsonFromFile("articles.json", &articles); err != nil {
articles, err = getZennArticles()
if err != nil {
return
}
if err = saveJsonToFile("articles.json", articles); err != nil {
return
}
}
articlesRecords := articlesRecords(articles)
if err := saveCSVToFile("articles.csv", articlesRecords); err != nil {
return
}
}
func getZennArticles() (Articles, error) {
res, err := http.Get(pHost + "/api/articles?topicname=go&order=daily&page=1")
if err != nil {
logger.Error("error", zap.Error(err))
return Articles{}, err
}
defer res.Body.Close()
var articles Articles
if err := json.NewDecoder(res.Body).Decode(&articles); err != nil {
logger.Error("error", zap.Error(err))
return Articles{}, err
}
logger.Info("get articles", zap.Int("count", len(articles.Articles)))
return articles, nil
}
func saveJsonToFile(fileName string, v any) error {
f, _ := os.Create(fileName)
if err := json.NewEncoder(f).Encode(v); err != nil {
logger.Error("error", zap.Error(err))
return err
}
defer f.Close()
logger.Info("save json to file", zap.String("file", fileName))
return nil
}
func readJsonFromFile(fileName string, v any) error {
f, _ := os.Open(fileName)
if err := json.NewDecoder(f).Decode(v); err != nil {
logger.Info("not found json file")
return err
}
defer f.Close()
logger.Info("read json from file", zap.String("file", fileName))
return nil
}
func articlesRecords(articles Articles) [][]string {
records := [][]string{
{"title", "user_name", "like_count", "path"},
}
for _, article := range articles.Articles {
records = append(records, []string{
article.Title,
article.User.Username,
fmt.Sprint(article.LikedCount),
pHost + article.Path,
})
}
return records
}
func saveCSVToFile(fileName string, articlesRecords [][]string) error {
f, err := os.OpenFile(fileName, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
logger.Error("error", zap.Error(err))
return err
}
defer f.Close()
w := csv.NewWriter(f)
defer w.Flush()
if err := w.WriteAll(articlesRecords); err != nil {
logger.Error("error", zap.Error(err))
return err
}
logger.Info("save csv to file", zap.String("file", fileName))
return nil
}
Discussion