-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsec_direct_edgar_api_download.py
More file actions
87 lines (67 loc) · 3.2 KB
/
sec_direct_edgar_api_download.py
File metadata and controls
87 lines (67 loc) · 3.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import requests
import re
import time
from bs4 import BeautifulSoup
import os
def get_10k_filings(ticker, num_filings=10):
"""
Download 10-K filings for a given ticker symbol
"""
# SEC EDGAR company lookup URL
cik_lookup_url = f"https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK={ticker}"
try:
response = requests.get(cik_lookup_url, headers={'User-Agent': 'FUNAIBUDDY spreadhappinesstoall062@gmail.com'})
response.raise_for_status()
# Extract CIK number
cik_match = re.search(r'CIK=(\d{10})', response.text)
if not cik_match:
print(f"Could not find CIK for {ticker}")
return []
cik = cik_match.group(1)
print(f"Found CIK for {ticker}: {cik}")
# Search for 10-K filings
filings_url = f"https://data.sec.gov/submissions/CIK{cik}.json"
headers = {
'User-Agent': 'Your Name email@example.com',
'Accept-Encoding': 'gzip, deflate',
'Host': 'data.sec.gov'
}
filings_response = requests.get(filings_url, headers=headers)
filings_response.raise_for_status()
data = filings_response.json()
# Get recent filings
recent_filings = data.get('filings', {}).get('recent', {})
# Create directory for downloads
os.makedirs(f"10K_downloads/{ticker}", exist_ok=True)
downloaded_files = []
# Find and download 10-K filings
for i, form in enumerate(recent_filings['form']):
if form == '10-K' and len(downloaded_files) < num_filings:
accession_number = recent_filings['accessionNumber'][i].replace('-', '')
filing_date = recent_filings['filingDate'][i]
primary_document = recent_filings['primaryDocument'][i]
# Construct the filing URL
filing_url = f"https://www.sec.gov/Archives/edgar/data/{cik}/{accession_number}/{primary_document}"
print(f"Downloading 10-K from {filing_date}...")
# Download the filing
file_response = requests.get(filing_url, headers={'User-Agent': 'Your Name email@example.com'})
if file_response.status_code == 200:
# Save the file
filename = f"10K_downloads/{ticker}/10K_{filing_date}_{accession_number}.txt"
with open(filename, 'w', encoding='utf-8') as f:
f.write(file_response.text)
downloaded_files.append(filename)
print(f"Saved: {filename}")
# Respect SEC's rate limiting
time.sleep(0.1)
else:
print(f"Failed to download: {filing_url}")
return downloaded_files
except Exception as e:
print(f"Error downloading filings for {ticker}: {e}")
return []
# Example usage
if __name__ == "__main__":
# Download 10-K filings for Apple
files = get_10k_filings("PGR", num_filings=5)
print(f"Downloaded {len(files)} files")