Python使用多个键对JSON对象进行分组

Python grouping JSON object using multiple keys

本文关键字:对象 JSON Python      更新时间:2023-09-26

我有这个JSON对象,其结构如下(JSON对象是使用to_json(orient="records")从pandas数据框中提取的)

data = [{'month': 'Jan','date': '18','activity': 'cycling','duration': 3},
        {'month': 'Jan', 'date': '18','activity': 'reading', 'duration': 3.0},
        {'month': 'Jan', 'date': '19', 'activity': 'scripting', 'duration': 19.5},
        {'month': 'Feb','date': '18', 'activity': 'work', 'duration': 22.0 },
        {'month': 'Feb', 'date': '19', 'activity': 'cooking','duration': 0.7},
        {'month': 'March', 'date': '16', 'activity': 'hiking', 'duration': 8.0}]

我试图通过两个字段monthdate分组预期结果:

data =  [{
            "month": "Jan",
            "details": [{
                "date": "18",
                "effort": [{
                    "activity": "cycling",
                    "duration": 3
                }, {
                    "activity": "reading",
                    "duration": 3.0
                }]
            }, {
                "date": "19",
                "effort": [{
                    "activity": "scripting",
                    "duration": 19.5
                }]
            }]
        }, {
            "month": "Feb",
            "details": [{
                "date": "18",
                "effort": [{
                    "activity": "work",
                    "duration": 22.0
                }]
            }, {
                "date": "19",
                "effort": [{
                    "activity": "cooking",
                    "duration": 0.7
                }]
            }]
        }, {
            "month": "March",
            "details": [{
                "date": "16",
                "effort": [{
                    "activity": "hiking",
                    "duration": 8.0
                }]
            }]
        }]

我尝试使用to_dict(orient="records")从pandas数据框中提取的数据作为python字典

list_ = []
for item in dict_:
    list_.append({"month" : item["month"],
                                "details":
                                [{
                                    "date" : item["date"],
                                    "efforts" : 
                                        [{
                                            "activity" : item["activity"],
                                            "duration": item["duration"]
                                        }]
                                }]
                            })
json.dumps(list_)       

我得到的输出是

[{
    "month": "Jan",
    "details": [{
        "date": "18",
        "efforts": [{
            "duration": 3,
            "activity": "cycling"
        }]
    }]
}, {
    "month": "Jan",
    "details": [{
        "date": "18",
        "efforts": [{
            "duration": 3.0,
            "activity": "reading"
        }]
    }]
}, {
    "month": "Jan",
    "details": [{
        "date": "19",
        "efforts": [{
            "duration": 19.5,
            "activity": "scripting"
        }]
    }]
}, {
    "month": "Feb",
    "details": [{
        "date": "18",
        "efforts": [{
            "duration": 22.0,
            "activity": "work"
        }]
    }]
}, {
    "month": "Feb",
    "details": [{
        "date": "19",
        "efforts": [{
            "duration": 0.7,
            "activity": "cooking"
        }]
    }]
}, {
    "month": "March",
    "details": [{
        "date": "16",
        "efforts": [{
            "duration": 8.0,
            "activity": "hiking"
        }]
    }]
}]

我不处理值与现有字段的连接。

尝试使用python以及java-script,你们有什么建议或解决问题的方法吗?由于

这似乎可以工作:

<标题> 代码
data = [{'month': 'Jan','date': '18','activity': 'cycling','duration': 3},
        {'month': 'Jan', 'date': '18','activity': 'reading', 'duration': 3.0},
        {'month': 'Jan', 'date': '19', 'activity': 'scripting', 'duration': 19.5},
        {'month': 'Feb','date': '18', 'activity': 'work', 'duration': 22.0 },
        {'month': 'Feb', 'date': '19', 'activity': 'cooking','duration': 0.7},
        {'month': 'March', 'date': '16', 'activity': 'hiking', 'duration': 8.0}]
new_data = []
not_found = True
for item in data:
    for month in new_data:
        not_found = True
        if item['month'] == month['month']:
            not_found = False
            for date in month['details']:
                if item['date'] == date['date']:
                    date['effort'].append({'activity':item['activity'], 'duration':item['duration']})
                else:
                    month['details'].append({'date':item['date'], 'effort':[{'activity':item['activity'], 'duration':item['duration']}]})
            break
    if not_found:
        new_data.append({'month':item['month'], 'details':[{'date':item['date'], '
            'effort':[{'activity':item['activity'], 'duration':item['duration']}]}]})
print new_data
<标题> 输出
[{'details': [{'date': '18', 'effort': [{'duration': 3, 'activity': 'cycling'}, {'duration': 3.0, 'activity': 'reading'}]}, {'date': '19', 'effort': [{'duration': 19.5, 'activity': 'scripting'}, {'duration': 19.5, 'activity': 'scripting'}]}], 'month': 'Jan'}, {'details': [{'date': '18', 'effort': [{'duration': 22.0, 'activity': 'work'}]}, {'date': '19', 'effort': [{'duration': 0.7, 'activity': 'cooking'}, {'duration': 0.7, 'activity': 'cooking'}]}], 'month': 'Feb'}, {'details': [{'date': '16', 'effort': [{'duration': 8.0, 'activity': 'hiking'}]}], 'month': 'March'}]

基本上只是遍历每个条目,首先检查月份是否存在,如果存在,检查日期是否已经存在,并相应地附加到新数据上。如果不存在月份,就附加所有内容,如果不存在日期,就附加日期细节和新活动。如果日期也存在,那么只需添加活动

对JSON进行分组的通用函数。您必须传递字段到组和组的键数组名称

def groupBy(vetor, campos, pos):
    if(pos >= len(campos)):
      return vetor
    gmx = campos[pos]
    agrupado = gmx["field"]
    kx = gmx["gbkey"]
    tam = len(campos)
    agrupados = {}
    saida = {}
    retorno = []    
    for l in vetor:
        lmf = {}
        for k, s in l.items():
            val_agrupado = l[agrupado]             
            if not (val_agrupado in agrupados):
                agrupados[val_agrupado] = []
            if agrupado != k:
              lmf[k] = s
        agrupados[val_agrupado].append(lmf)              
    for l in agrupados:
        agrup = agrupados[l]
        if(len(campos) > 1):            
            agrup = groupBy(agrup, campos, pos + 1)
        saida = {}
        saida[agrupado] = l
        saida[kx] = agrup
        retorno.append(saida)
    return retorno
    
data = [{'month': 'Jan','date': '18','activity': 'cycling','duration': 3},
        {'month': 'Jan', 'date': '18','activity': 'reading', 'duration': 3.0},
        {'month': 'Jan', 'date': '19', 'activity': 'scripting', 'duration': 19.5},
        {'month': 'Feb','date': '18', 'activity': 'work', 'duration': 22.0 },
        {'month': 'Feb', 'date': '19', 'activity': 'cooking','duration': 0.7},
        {'month': 'March', 'date': '16', 'activity': 'hiking', 'duration': 8.0}]
print(groupBy(data, [{'field':'month', 'gbkey': 'details'}, {'field':'date', 'gbkey': 'effort'}], 0))

它会生成类似

的内容
[
   {
      "month":"Jan",
      "details":[
         {
            "date":"18",
            "effort":[
               {
                  "activity":"cycling",
                  "duration":3
               },
               {
                  "activity":"reading",
                  "duration":3.0
               }
            ]
         },
         {
            "date":"19",
            "effort":[
               {
                  "activity":"scripting",
                  "duration":19.5
               }
            ]
         }
      ]
   },
   {
      "month":"Feb",
      "details":[
         {
            "date":"18",
            "effort":[
               {
                  "activity":"work",
                  "duration":22.0
               }
            ]
         },
         {
            "date":"19",
            "effort":[
               {
                  "activity":"cooking",
                  "duration":0.7
               }
            ]
         }
      ]
   },
   {
      "month":"March",
      "details":[
         {
            "date":"16",
            "effort":[
               {
                  "activity":"hiking",
                  "duration":8.0
               }
            ]
         }
      ]
   }
]