How to parse hierarchical revenue segments? #675
-
|
I was able to get the revenue segments from My code is like below: def get_xbrl_data(ticker: str, filing: Filing):
if filing.period_end_date is None:
raise ValueError("period_end_date is none")
company = Company(ticker)
filings = company.get_filings(
form=filing.form,
filing_date=filing.filing_date,
amendments=False,
date=filing.period_end_date,
accession_number=filing.accession_number,
)
if filings.empty:
raise ValueError(f"No filings found for {ticker}")
filing_obj = filings.get_filing_at(0)
xb = filing_obj.xbrl()
if xb is None:
raise ValueError("xbrl is empty")
result = (
xb.query(
include_contexts=True,
include_dimensions=True,
include_element_info=True,
)
.by_statement_type("IncomeStatement")
.by_date_range(
start_date=calculate_start_date(
filing.period_end_date,
filing.form,
),
end_date=filing.period_end_date,
)
.by_custom(concept_filter(included_keys=REVENUE_CONCEPTS_PRIORITY))
.sort_by(
column="numeric_value",
ascending=False,
)
)
df = result.to_dataframe().drop_duplicates()
df.to_csv(f"{ticker}.csv")
```
it outputs the revenues like below:
```text
dimension label numeric_value
0 NaN Revenues 2.809500e+10
1 srt:ProductOrServiceAxis Total revenues from sales and services 2.753200e+10
2 srt:ProductOrServiceAxis Automotive Revenues 2.120500e+10
3 srt:ProductOrServiceAxis Automotive sales 2.035900e+10
4 srt:ProductOrServiceAxis Services and other 3.475000e+09
5 srt:ProductOrServiceAxis Energy generation and storage 3.415000e+09
6 srt:ProductOrServiceAxis Energy generation and storage sales 3.281000e+09
7 srt:ProductOrServiceAxis Automotive leasing 4.290000e+08
8 srt:ProductOrServiceAxis Automotive regulatory credits 4.170000e+08
9 srt:ProductOrServiceAxis Energy generation and storage leasing 1.340000e+08I want to generate it in a way something like below I want it something like below - with parent child relation In the documentation I saw some output like below how it is possible to build such hierarchy? |
Beta Was this translation helpful? Give feedback.
Replies: 2 comments 5 replies
-
|
Great question! The key insight is that Quick Solutionfrom edgar import Company
company = Company("TSLA")
filing = company.get_filings(form="10-K").latest()
xbrl = filing.xbrl()
# Use the Statement object — hierarchy is preserved automatically
income = xbrl.statements.income_statement()
df = income.to_dataframe()The DataFrame includes these hierarchy columns:
Build the Treedate_cols = [c for c in df.columns if c.startswith("20")]
latest = date_cols[0] if date_cols else None
for _, row in df.iterrows():
indent = " " * int(row["level"])
label = row["label"]
if row["abstract"]:
print(f"{indent}{label}:")
elif latest:
value = row[latest]
if value and value == value: # not NaN
print(f"{indent}{label:50s} ${value:>15,.0f}")Find Revenue ChildrenUse # Find the total revenue concept
revenue_rows = df[df["label"].str.contains("revenue", case=False, na=False) & ~df["abstract"]]
total_concept = revenue_rows.iloc[-1]["concept"]
# Get all children
children = df[df["parent_concept"] == total_concept]
print(children[["label", latest]].to_string(index=False))For dimensional breakdowns (segment/geography data), use the df_detailed = income.to_dataframe(view="detailed")Documentation & TutorialWe just added docs covering this exact use case:
Thanks for raising this — it helped us identify a documentation gap! |
Beta Was this translation helpful? Give feedback.
-
|
Great question — you've found a real gap in edgartools. Here's what's happening and a workaround. Why the segments come out flatWhen edgartools builds dimensional rows (segment members) for a statement, it puts them all at the same nesting level. The XBRL definition linkbase does define a member hierarchy — "Automotive sales" is a child of "Automotive Revenues" — but edgartools doesn't use that hierarchy when building the DataFrame. We have an issue open to fix this properly. In the meantime, here's a workaround. Workaround: value-math tree builderSEC filings must balance — a parent's value always equals the sum of its children. We can use that to reconstruct the tree: from edgar import Company
from itertools import combinations
def revenue_segments(income_statement, period=None):
"""Build a revenue segment tree from an income statement."""
df = income_statement.to_dataframe()
date_cols = [c for c in df.columns if c.startswith('20')]
period = period or date_cols[0]
total = df[~df['abstract'] & ~df['dimension']].iloc[0]
segments = df[(df['concept'] == total['concept']) & df['dimension'] & df[period].notna()]
items = [(total['label'], float(total[period]))]
items += [(r['label'], float(r[period])) for _, r in segments.iterrows()]
tol = max(abs(v) for _, v in items) * 1e-6
decomps = {}
for i, (_, vi) in enumerate(items):
if vi <= 0: continue
cands = [(j, vj) for j, (_, vj) in enumerate(items) if j != i and 0 < vj < vi]
for size in range(2, min(len(cands) + 1, 8)):
for combo in combinations(cands, size):
if abs(sum(v for _, v in combo) - vi) < tol:
decomps.setdefault(i, []).append(frozenset(j for j, _ in combo))
children = {}
assigned = set()
def assign(parent):
for child_set in sorted(decomps.get(parent, []), key=len):
if not child_set & assigned:
children[parent] = sorted(child_set, key=lambda j: items[j][1], reverse=True)
assigned.update(child_set)
for c in children[parent]: assign(c)
return
assign(0)
def build(i):
label, val = items[i]
return {'label': label, 'value': val,
'children': [build(c) for c in children.get(i, [])]}
return build(0)
# Usage
company = Company("TSLA")
filing = company.get_filings(form="10-K").latest()
income = filing.xbrl().statements.income_statement()
tree = revenue_segments(income)Tesla output: Apple output: The function returns a nested dict with We'll fix edgartools to surface this hierarchy natively from the definition linkbase so you won't need the workaround. |
Beta Was this translation helpful? Give feedback.
Later than never
and following is an example file for LLY
LLY_revenue_segments.csv
Workflow is something like below