🐡

【Go標準ライブラリ】スクレイピング結果をCSVに変換

2023/09/18に公開

内容

Golangのhttp, fileのread/write周りの動作を把握するために簡単なコードで確認。
ZennのGolangのトレンド記事をCSVで取得するコードです。

memo

package main

import (
	"encoding/csv"
	"encoding/json"
	"fmt"
	"net/http"
	"os"
	"time"

	"go.uber.org/zap"
)

var (
	logger, _ = zap.NewDevelopment()
	pHost     = "https://zenn.dev"
)

type Articles struct {
	Articles []Article `json:"articles"`
	NextPage any       `json:"next_page"`
}

type Article struct {
	ID                  int       `json:"id"`
	PostType            string    `json:"post_type"`
	Title               string    `json:"title"`
	Slug                string    `json:"slug"`
	CommentsCount       int       `json:"comments_count"`
	LikedCount          int       `json:"liked_count"`
	BodyLettersCount    int       `json:"body_letters_count"`
	ArticleType         string    `json:"article_type"`
	Emoji               string    `json:"emoji"`
	IsSuspendingPrivate bool      `json:"is_suspending_private"`
	PublishedAt         time.Time `json:"published_at"`
	BodyUpdatedAt       time.Time `json:"body_updated_at"`
	SourceRepoUpdatedAt any       `json:"source_repo_updated_at"`
	Pinned              bool      `json:"pinned"`
	Path                string    `json:"path"`
	User                User      `json:"user"`
	Publication         any       `json:"publication"`
}

type User struct {
	ID             int    `json:"id"`
	Username       string `json:"username"`
	Name           string `json:"name"`
	AvatarSmallURL string `json:"avatar_small_url"`
}

func main() {
	articles := Articles{}
	if err := readJsonFromFile("articles.json", &articles); err != nil {
		articles, err = getZennArticles()
		if err != nil {
			return
		}
		if err = saveJsonToFile("articles.json", articles); err != nil {
			return
		}
	}

	articlesRecords := articlesRecords(articles)
	if err := saveCSVToFile("articles.csv", articlesRecords); err != nil {
		return
	}
}

func getZennArticles() (Articles, error) {
	res, err := http.Get(pHost + "/api/articles?topicname=go&order=daily&page=1")
	if err != nil {
		logger.Error("error", zap.Error(err))
		return Articles{}, err
	}
	defer res.Body.Close()

	var articles Articles
	if err := json.NewDecoder(res.Body).Decode(&articles); err != nil {
		logger.Error("error", zap.Error(err))
		return Articles{}, err
	}
	logger.Info("get articles", zap.Int("count", len(articles.Articles)))
	return articles, nil
}

func saveJsonToFile(fileName string, v any) error {
	f, _ := os.Create(fileName)
	if err := json.NewEncoder(f).Encode(v); err != nil {
		logger.Error("error", zap.Error(err))
		return err
	}
	defer f.Close()
	logger.Info("save json to file", zap.String("file", fileName))
	return nil
}

func readJsonFromFile(fileName string, v any) error {
	f, _ := os.Open(fileName)
	if err := json.NewDecoder(f).Decode(v); err != nil {
		logger.Info("not found json file")
		return err
	}
	defer f.Close()
	logger.Info("read json from file", zap.String("file", fileName))
	return nil
}

func articlesRecords(articles Articles) [][]string {
	records := [][]string{
		{"title", "user_name", "like_count", "path"},
	}
	for _, article := range articles.Articles {
		records = append(records, []string{
			article.Title,
			article.User.Username,
			fmt.Sprint(article.LikedCount),
			pHost + article.Path,
		})
	}
	return records
}

func saveCSVToFile(fileName string, articlesRecords [][]string) error {
	f, err := os.OpenFile(fileName, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
	if err != nil {
		logger.Error("error", zap.Error(err))
		return err
	}
	defer f.Close()

	w := csv.NewWriter(f)
	defer w.Flush()

	if err := w.WriteAll(articlesRecords); err != nil {
		logger.Error("error", zap.Error(err))
		return err
	}
	logger.Info("save csv to file", zap.String("file", fileName))
	return nil
}

Discussion