How to use Google Custom Search API in Python
Google Custom Search Engine set-up
Google Custom Search Engine (CSE) allows to search only specific websites or the entire web. Using CSE supposes having a Google account. To set up CSE head to https://programmablesearchengine.google.com/about/, create your search engine, retrieve your Search engine ID and API key, and enable searching the entire web, in the API control panel.
Here is the Python code for searching Norwegian voluntary organizations and climate on the web:
import requests
import pandas as pd
# get the API KEY here: https://developers.google.com/custom-search/v1/overview
API_KEY = "AIzaSyCUfoQzKccMlSGJUdiOCop8MGtP9FTud8k"
# get your Search Engine ID on your CSE control panel
SEARCH_ENGINE_ID = "61659eec6a73640b0"
orga =["Røde kors",
"norsk folkehjelp",
"Norske redningshunder",
"speidernes beredskapsgruppe",
"Norske alpine redningsgrupper",
"Frivillige Organisasjoners Redningsfaglige Forum",
"Naturvernforbundet",
"WWF",
"Natur og Ungdom",
"Bellona",
"Greenpeace",
"Miljøstiftelsen Zero"]
dfs = []
for org in orga:
# the search query you want
query = org + "klima"
# using the first page
page = 2
# constructing the URL
# doc: https://developers.google.com/custom-search/v1/using_rest
# calculating start, (page=2) => (start=11), (page=3) => (start=21)
start = (page - 1) * 10 + 1
url = f"https://www.googleapis.com/customsearch/v1?key={API_KEY}&cx={SEARCH_ENGINE_ID}&lr={'lang_no'}&q={query}&start={start}"
# make the API request
data = requests.get(url).json()
#cols = ['title', 'snippet', 'htmlSnippet', 'link']
try:
df = pd.DataFrame(data['items'])
except KeyError:
df["title"]="N/A"
#df.head()
try:
df = pd.concat([df, df["pagemap"].apply(pd.Series)], axis=1)
df.drop(columns="pagemap")
except KeyError:
df["pagemap"] ="N/A"
#df.head()
#list(df.columns)
#df['metatags']
try:
df = pd.concat([df, df["metatags"].apply(pd.Series)], axis=1)
df.drop(columns="metatags")
except KeyError:
df["metatags"] ="N/A"
#list(df.columns)
try:
df = pd.concat([df, df[0].apply(pd.Series)], axis=1)
df.drop(columns=0)
except KeyError:
df["og:description"] ="N/A"
#list(df.columns)
#df["og:description"]
df = df[["title","snippet", "htmlSnippet", "link", "og:description"]]
df = df.set_index('title')
dfs.append(df)
## kind ... cse_image
## 0 customsearch#result ... [{'src': 'https://docplayer.me/thumbs/112/2039...
##
## [1 rows x 13 columns]
## kind ... 0
## 0 customsearch#result ... {'og:image': 'https://docplayer.me/thumbs/112/...
##
## [1 rows x 14 columns]
## kind ... og:description
## 0 customsearch#result ... RØDE KORS Avis fra Røde Kors Aust-Agder og Rød...
##
## [1 rows x 20 columns]
## kind ... cse_image
## 0 customsearch#result ... [{'src': 'https://images-global.nhst.tech/imag...
## 1 customsearch#result ... [{'src': 'https://legacy.altinget.no/images/ar...
## 2 customsearch#result ... NaN
## 3 customsearch#result ... NaN
## 4 customsearch#result ... [{'src': 'https://legacy.altinget.no/images/ar...
## 5 customsearch#result ... NaN
## 6 customsearch#result ... [{'src': 'x-raw-image:///df8a91bcdb0da2572749c...
## 7 customsearch#result ... [{'src': 'http://static1.squarespace.com/stati...
## 8 customsearch#result ... [{'src': 'https://www.kirken.no/globalassets/k...
## 9 customsearch#result ... [{'src': 'https://www.dagsavisen.no/resizer/ZN...
##
## [10 rows x 15 columns]
## kind ... 0
## 0 customsearch#result ... {'og:image': 'https://images-global.nhst.tech/...
## 1 customsearch#result ... {'theme-color': '#ffffff', 'og:locale:alternat...
## 2 customsearch#result ... {'theme-color': '#ffffff', 'og:locale:alternat...
## 3 customsearch#result ... {'theme-color': '#ffffff', 'og:locale:alternat...
## 4 customsearch#result ... {'og:image': 'https://legacy.altinget.no/image...
## 5 customsearch#result ... {'theme-color': '#ffffff', 'og:locale:alternat...
## 6 customsearch#result ... {'moddate': 'D:20200126034219-08'00'', 'creato...
## 7 customsearch#result ... {'og:image': 'http://static1.squarespace.com/s...
## 8 customsearch#result ... {'og:image': '/globalassets/kirken.no/aktuelt/...
## 9 customsearch#result ... {'og:image': 'https://www.dagsavisen.no/resize...
##
## [10 rows x 16 columns]
## kind ... referrer
## 0 customsearch#result ... NaN
## 1 customsearch#result ... NaN
## 2 customsearch#result ... NaN
## 3 customsearch#result ... NaN
## 4 customsearch#result ... NaN
## 5 customsearch#result ... NaN
## 6 customsearch#result ... NaN
## 7 customsearch#result ... NaN
## 8 customsearch#result ... NaN
## 9 customsearch#result ... no-referrer-when-downgrade
##
## [10 rows x 57 columns]
## kind ... newsarticle
## 0 customsearch#result ... NaN
## 1 customsearch#result ... NaN
## 2 customsearch#result ... NaN
## 3 customsearch#result ... NaN
## 4 customsearch#result ... NaN
## 5 customsearch#result ... NaN
## 6 customsearch#result ... NaN
## 7 customsearch#result ... [{'https://www.adplogger.no/json-schema/meta-a...
## 8 customsearch#result ... NaN
## 9 customsearch#result ... NaN
##
## [10 rows x 22 columns]
## kind ... 0
## 0 customsearch#result ... {'moddate': 'D:20210105104420+01'00'', 'creati...
## 1 customsearch#result ... {'og:type': 'website', 'og:site_name': 'NSR', ...
## 2 customsearch#result ... {'og:image': 'https://miljoagentene.no/getfile...
## 3 customsearch#result ... {'msapplication-config': '/cnp-assets/favicon-...
## 4 customsearch#result ... {'p:domain_verify': 'ec4318a079405ca7cd9055634...
## 5 customsearch#result ... {'og:image': 'https://www.vl.no/resizer/ssgDfY...
## 6 customsearch#result ... {'og:image': 'https://www.cdn.tv2.no/images/14...
## 7 customsearch#result ... {'apple-itunes-app': 'app-id=1342833164', 'og:...
## 8 customsearch#result ... {'og:image': 'https://www.vl.no/resizer/YnDPLD...
## 9 customsearch#result ... {'msapplication-tilecolor': '#2d505a', 'og:ima...
##
## [10 rows x 23 columns]
## kind ... google-play-app
## 0 customsearch#result ... NaN
## 1 customsearch#result ... NaN
## 2 customsearch#result ... NaN
## 3 customsearch#result ... NaN
## 4 customsearch#result ... NaN
## 5 customsearch#result ... NaN
## 6 customsearch#result ... NaN
## 7 customsearch#result ... app-id=no.nyhetsvarsel.kv
## 8 customsearch#result ... NaN
## 9 customsearch#result ... NaN
##
## [10 rows x 97 columns]
## kind ... cse_image
## 0 customsearch#result ... [{'src': 'https://gfx.nrk.no/0xl8kcCWTVBUozF9B...
##
## [1 rows x 14 columns]
## kind ... 0
## 0 customsearch#result ... {'p:domain_verify': 'ec4318a079405ca7cd9055634...
##
## [1 rows x 15 columns]
## kind ... og:url
## 0 customsearch#result ... https://www.nrk.no/norge/klimaaktivist_-_-eg-t...
##
## [1 rows x 41 columns]
## kind ... listitem
## 0 customsearch#result ... [{'position': '1'}]
##
## [1 rows x 19 columns]
## kind ... 0
## 0 customsearch#result ... {'og:image': 'https://framtida.no/wp-content/u...
##
## [1 rows x 20 columns]
## kind ... og:url
## 0 customsearch#result ... https://framtida.no/2021/12/01/julegavetips-ti...
##
## [1 rows x 37 columns]
## kind ... cse_image
## 0 customsearch#result ... [{'src': 'https://images.squarespace-cdn.com/c...
## 1 customsearch#result ... [{'src': 'https://energiogklima.no/_gatsby/ima...
## 2 customsearch#result ... [{'src': 'http://static1.squarespace.com/stati...
## 3 customsearch#result ... [{'src': 'x-raw-image:///ba76d42176a4b093acb0a...
## 4 customsearch#result ... NaN
## 5 customsearch#result ... [{'src': 'x-raw-image:///70537314466067df405f9...
## 6 customsearch#result ... [{'src': 'x-raw-image:///836b8089add529db176a8...
## 7 customsearch#result ... [{'src': 'https://eneasnett.no/wp-content/uplo...
## 8 customsearch#result ... [{'src': 'https://eneasnett.no/wp-content/uplo...
## 9 customsearch#result ... [{'src': 'https://eneasnet.se/wp-content/uploa...
##
## [10 rows x 15 columns]
## kind ... 0
## 0 customsearch#result ... {'og:type': 'website', 'twitter:title': 'Strea...
## 1 customsearch#result ... {'image': 'https://energiogklima.no/_gatsby/im...
## 2 customsearch#result ... {'og:image': 'http://static1.squarespace.com/s...
## 3 customsearch#result ... {'moddate': 'D:20201222092819Z00'00'', 'creato...
## 4 customsearch#result ... {'og:type': 'website', 'twitter:title': 'About...
## 5 customsearch#result ... {'moddate': 'D:20110930141719+02'00'', 'creati...
## 6 customsearch#result ... {'moddate': 'D:20180619070532+02'00'', 'creati...
## 7 customsearch#result ... {'og:type': 'object', 'twitter:card': 'summary...
## 8 customsearch#result ... {'og:type': 'website', 'twitter:card': 'summar...
## 9 customsearch#result ... {'og:type': 'website', 'twitter:card': 'summar...
##
## [10 rows x 16 columns]
## kind ... og:locale
## 0 customsearch#result ... NaN
## 1 customsearch#result ... NaN
## 2 customsearch#result ... NaN
## 3 customsearch#result ... NaN
## 4 customsearch#result ... NaN
## 5 customsearch#result ... NaN
## 6 customsearch#result ... NaN
## 7 customsearch#result ... en_US
## 8 customsearch#result ... en_US
## 9 customsearch#result ... sv_SE
##
## [10 rows x 40 columns]
df2 = pd.concat(dfs)
df2.to_csv("klima-org.csv")