From 1f63795e49ab8a9a97189ead44df5d4e3520e09a Mon Sep 17 00:00:00 2001
From: Aroy-Art <Aroy-Art@pm.me>
Date: Fri, 12 Jul 2024 21:48:47 +0200
Subject: [PATCH] Add: basic scraper that gets all urls from a wikipedia
 article

---
 web-scraper/basic-scraper.go | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100644 web-scraper/basic-scraper.go

diff --git a/web-scraper/basic-scraper.go b/web-scraper/basic-scraper.go
new file mode 100644
index 0000000..7e7ef4c
--- /dev/null
+++ b/web-scraper/basic-scraper.go
@@ -0,0 +1,20 @@
+package main
+
+import (
+    "fmt"
+
+    "github.com/gocolly/colly"
+)
+
+func main() {
+    c := colly.NewCollector(
+        colly.AllowedDomains("en.wikipedia.org"),
+    )
+
+    // Find and print all links
+    c.OnHTML(".mw-parser-output", func(e *colly.HTMLElement) {
+        links := e.ChildAttrs("a", "href")
+        fmt.Println(links)
+    })
+    c.Visit("https://en.wikipedia.org/wiki/Web_scraping")
+}