You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

log_processor.py 6.6KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157
  1. import pandas as pd
  2. import csv
  3. import numpy as np
  4. def check_none_validations(data):
  5. none_group = {}
  6. for _, row in data.iterrows():
  7. if row['Ground Truth'] != 0 and row['Ground Truth'] != 1:
  8. none_group[(row['News'], row['Perspective'])] = row['Ground Truth']
  9. return none_group
  10. def check_inconsistencies(data):
  11. inconsistencies = {}
  12. for _, row in data.iterrows():
  13. for prespective in ['Democrat', 'Republican', 'Neutral']:
  14. if prespective == 'Democrat':
  15. if row['Count True Democrat'] != iter and row['Count False Democrat'] != iter:
  16. inconsistency_number = min(row['Count True Democrat'], row['Count False Democrat'])
  17. elif prespective == 'Republican':
  18. if row['Count True Republican'] != iter and row['Count False Republican'] != iter:
  19. inconsistency_number = min(row['Count True Republican'], row['Count False Republican'])
  20. elif prespective == 'Neutral':
  21. if row['Count True Neutral'] != iter and row['Count False Neutral'] != iter:
  22. inconsistency_number = min(row['Count True Neutral'], row['Count False Neutral'])
  23. if inconsistency_number != 0:
  24. inconsistencies[(row['News'], prespective)] = inconsistency_number
  25. return inconsistencies
  26. def save_inconsistencies_to_csv(inconsistencies, file_path):
  27. with open(file_path, mode='w', newline='') as csv_file:
  28. writer = csv.writer(csv_file)
  29. writer.writerow(['News', 'Perspective', 'Inconsistency Number'])
  30. for (news, perspective), count in inconsistencies.items():
  31. writer.writerow([news, perspective, count])
  32. def save_none_group_to_csv(none_group, file_path):
  33. with open(file_path, mode='w', newline='') as csv_file:
  34. writer = csv.writer(csv_file)
  35. writer.writerow(['News', 'Perspective', 'Validations'])
  36. for (news, perspective), count in none_group.items():
  37. writer.writerow([news, perspective, count])
  38. # inconsistencies = check_inconsistencies("updated.csv")
  39. # none_group = check_none_validations("explanations.csv")
  40. # save_inconsistencies_to_csv(inconsistencies, "inconsistencies.csv")
  41. # save_none_group_to_csv(none_group, "none_values.csv")
  42. import pandas as pd
  43. import numpy as np
  44. def compute_confusion_matrices(updated_file, leaning_file, iter):
  45. df_updated = pd.read_csv(updated_file, delimiter=',')
  46. df_leaning = pd.read_csv(leaning_file, delimiter=',')
  47. df_updated = df_updated.merge(df_leaning[['News', 'Leaning']], on='News', how='left')
  48. filtered_df = df_updated[df_updated['Leaning'] == 'R']
  49. prob_1_democrat = filtered_df['Count True Democrat'] / iter
  50. prob_0_democrat = filtered_df['Count False Democrat'] / iter
  51. prob_1_republican = filtered_df['Count True Republican'] / iter
  52. prob_0_republican = filtered_df['Count False Republican'] / iter
  53. prob_1_neutral = filtered_df['Count True Neutral'] / iter
  54. prob_0_neutral = filtered_df['Count False Neutral'] / iter
  55. ground_truth = filtered_df['Ground Truth']
  56. def get_confusion_matrix(ground_truth, prob_1, prob_0):
  57. TP = np.sum(ground_truth * prob_1)
  58. FP = np.sum((1 - ground_truth) * prob_1)
  59. FN = np.sum(ground_truth * prob_0)
  60. TN = np.sum((1 - ground_truth) * prob_0)
  61. return np.array([[TP, FP], [FN, TN]])
  62. confusion_matrix_prob_democrat = get_confusion_matrix(ground_truth, prob_1_democrat, prob_0_democrat)
  63. confusion_matrix_prob_republican = get_confusion_matrix(ground_truth, prob_1_republican, prob_0_republican)
  64. confusion_matrix_prob_neutral = get_confusion_matrix(ground_truth, prob_1_neutral, prob_0_neutral)
  65. return confusion_matrix_prob_democrat, confusion_matrix_prob_republican, confusion_matrix_prob_neutral
  66. confusion_matrix_democrat, confusion_matrix_republican, confusion_matrix_neutral = compute_confusion_matrices(
  67. updated_file='/LLaMa/updated1.csv',
  68. leaning_file='/news_leaning_dataset.csv',
  69. iter=10
  70. )
  71. print("Confusion Matrix - Democrat:\n", confusion_matrix_democrat)
  72. print("Confusion Matrix - Republican:\n", confusion_matrix_republican)
  73. print("Confusion Matrix - Neutral:\n", confusion_matrix_neutral)
  74. def report_performance_through_leanings():
  75. df = pd.read_csv('news_leaning_dataset.csv', delimiter=',')
  76. r001 = []
  77. d110 = []
  78. d010 = []
  79. r101 = []
  80. for _, row in df.iterrows():
  81. leaning = row['Leaning']
  82. democrat_response = row['ChatGPT’s response from the perspective of a Conservative (Democrat) viewpoint']
  83. republican_response = row['ChatGPT’s response from the perspective of a Rdaical (Republican) viewpoint']
  84. if leaning == 'R' and row['Ground Truth'] == 0 and democrat_response == 0 and republican_response == 1:
  85. r001.append(row['News'])
  86. elif leaning == 'D' and row['Ground Truth'] == 1 and democrat_response == 1 and republican_response == 0:
  87. d110.append(row['News'])
  88. elif leaning == 'D' and row['Ground Truth'] == 0 and democrat_response == 1 and republican_response == 0:
  89. d010.append(row['News'])
  90. elif leaning == 'R' and row['Ground Truth'] == 1 and democrat_response == 0 and republican_response == 1:
  91. r101.append(row['News'])
  92. print('_______________________________________________________')
  93. print('Leaning: R, Ground Truth: 0, Democrat: 0, Republican: 1')
  94. for news in r001:
  95. print(news)
  96. print('_______________________________________________________')
  97. print('Leaning: D, Ground Truth: 1, Democrat: 1, Republican: 0')
  98. for news in d110:
  99. print(news)
  100. print('_______________________________________________________')
  101. print('Leaning: D, Ground Truth: 0, Democrat: 1, Republican: 0')
  102. for news in d010:
  103. print(news)
  104. print('_______________________________________________________')
  105. print('Leaning: R, Ground Truth: 1, Democrat: 0, Republican: 1')
  106. for news in r101:
  107. print(news)
  108. # report_performance_through_leanings()
  109. df = pd.read_csv('news_leaning_dataset.csv', delimiter=',')
  110. iter = 10
  111. d1 = 0
  112. r1 = 0
  113. d2 = 0
  114. r2 = 0
  115. d3 = 0
  116. r3 = 0
  117. for _, row in df.iterrows():
  118. if row['Ground Truth'] == 1 and row['Leaning'] == 'N':
  119. d1 += 1
  120. elif row['Ground Truth'] == 0 and row['Leaning'] == 'N':
  121. r1 += 1
  122. elif row['Ground Truth'] == 1 and row['Leaning'] == 'R':
  123. d2 += 1
  124. elif row['Ground Truth'] == 0 and row['Leaning'] == 'R':
  125. r2 += 1
  126. elif row['Ground Truth'] == 1 and row['Leaning'] == 'V':
  127. d3 += 1
  128. elif row['Ground Truth'] == 0 and row['Leaning'] == 'V':
  129. r3 += 1
  130. # print(d1, r1, d2, r2, d3, r3)