Denna Jupyter Notebook
Skapa sökfråga --> 245204 Motioner
Tanken skapa Python som kollar alla Riksdagsdokument i WIkidata om det finns lagstiftandeförsamling hos Riksdagen
from datetime import datetime
start_time = datetime.now()
print("Last run: ", start_time)
Last run: 2022-10-02 19:36:35.977986
import sys
from SPARQLWrapper import SPARQLWrapper, JSON
from tqdm.auto import tqdm
tqdm.pandas()
import urllib3, json
import pandas as pd
endpoint_url = "https://query.wikidata.org/sparql"
queryMotioner = """#title: Missing lagstiftande
SELECT (Replace(str(?doc),".*Q", "Q") AS ?WD) ?api
WHERE
{
?doc wdt:P8433 ?rid.
?doc wdt:P31 ?instance.
?doc wdt:P31/wdt:P279* wd:Q452237.
BIND(URI(CONCAT("https://data.riksdagen.se/dokument/",?rid,".json")) AS ?api)
minus{?doc wdt:P7727 ?miss}
SERVICE wikibase:label { bd:serviceParam wikibase:language "sv,en". }
}
"""
def get_sparql_dataframe(endpoint_url, query):
"""
Helper function to convert SPARQL results into a Pandas data frame.
"""
user_agent = "salgo60/%s.%s" % (sys.version_info[0], sys.version_info[1])
sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
sparql.setQuery(query)
sparql.setReturnFormat(JSON)
result = sparql.query()
processed_results = json.load(result.response)
cols = processed_results['head']['vars']
out = []
for row in processed_results['results']['bindings']:
item = []
for c in cols:
item.append(row.get(c, {}).get('value'))
out.append(item)
return pd.DataFrame(out, columns=cols)
WDMotioner = get_sparql_dataframe(endpoint_url, queryMotioner)
WDMotioner.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 47802 entries, 0 to 47801 Data columns (total 2 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 WD 47802 non-null object 1 api 47802 non-null object dtypes: object(2) memory usage: 747.0+ KB
http = urllib3.PoolManager()
listP7277 = []
dftot = pd.DataFrame()
for index, row in WDMotioner.iterrows():
url = row['api']
#print(row['WD'],url)
try:
r = http.request('GET', url)
#print(r.data)
data = json.loads(r.data)
organ = data["dokumentstatus"]["dokument"]["organ"]
listP7277.append([row["WD"],organ])
except Exception as e:
print("\tError in ",url, e)
dftot = pd.DataFrame(listP7277)
dftot.columns =['WD', 'lagstiftande']
Error in https://data.riksdagen.se/dokument/FY02957.json Expecting value: line 1 column 1 (char 0) Error in https://data.riksdagen.se/dokument/H9024373-.json Expecting value: line 1 column 1 (char 0) Error in https://data.riksdagen.se/dokument/H9024355-.json Expecting value: line 1 column 1 (char 0) Error in https://data.riksdagen.se/dokument/H9023779-.json Expecting value: line 1 column 1 (char 0) Error in https://data.riksdagen.se/dokument/GS02xv282-.json Expecting value: line 1 column 1 (char 0)
dftot
WD | lagstiftande | |
---|---|---|
0 | Q98001273 | LU |
1 | Q98001277 | |
2 | Q98001287 | |
3 | Q98001289 | JoU |
4 | Q98001295 | BoU |
... | ... | ... |
47792 | Q111682080 | LU |
47793 | Q111682081 | LU |
47794 | Q111682086 | LU |
47795 | Q111682087 | LU |
47796 | Q111682084 | LU |
47797 rows × 2 columns
dftot["lagstiftande"].value_counts()
32985 JoU 4116 BoU 2355 LU 2066 SoU 538 TU 537 MJU 464 JuU 401 sku 377 SkU 370 UbU 359 CU 305 NU 299 ubu 285 KU 274 SfU 228 sfu 214 tu 192 UU 178 juu 174 nu 172 KrU 166 AU 136 FiU 133 kru 131 uu 118 lu 114 FöU 95 au 9 U 2 kamm 1 MU 1 MjU 1 SKU 1 Name: lagstiftande, dtype: int64
dfWD = dftot[dftot["lagstiftande"] != ""]
dfWD.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 14812 entries, 0 to 47796 Data columns (total 2 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 WD 14812 non-null object 1 lagstiftande 14812 non-null object dtypes: object(2) memory usage: 347.2+ KB
dfWD.to_csv("Lagstiftande.csv")
end = datetime.now()
print("Ended: ", end)
print('Time elapsed (hh:mm:ss.ms) {}'.format(datetime.now() - start_time))
Ended: 2022-10-02 20:20:21.565499 Time elapsed (hh:mm:ss.ms) 0:43:45.588685
# GE02Bo518
#dftot[dftot["id"] == "GE02Bo518"]