Details
-
Bug
-
Status: Resolved
-
Major
-
Resolution: Incomplete
-
2.4.3
-
None
Description
SELECT group_averages.* FROM group_averages NATURAL INNER JOIN ( SELECT MAX(R) AS max_R, ipi AS ipi, description AS description, symbol AS symbol, residue FROM group_averages GROUP BY ipi, description, symbol, residue ) AS all_rows_bigger_than_four WHERE all_rows_bigger_than_four.max_R >= 4.0
causes,
--------------------------------------------------------------------------- Py4JJavaError Traceback (most recent call last) /usr/local/spark/python/pyspark/sql/utils.py in deco(*a, **kw) 62 try: ---> 63 return f(*a, **kw) 64 except py4j.protocol.Py4JJavaError as e: /usr/local/spark/python/lib/py4j-0.10.7-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name) 327 "An error occurred while calling {0}{1}{2}.\n". --> 328 format(target_id, ".", name), value) 329 else: Py4JJavaError: An error occurred while calling o21.sql. : org.apache.spark.sql.AnalysisException: Resolved attribute(s) R#661751,residue#661752 missing from ipi#660814,residue#660731,exper_set#660827,R#660730,description#660815,sequence#660817,exper#660828,symbol#660816 in operator !Project [ipi#660814, symbol#660816, description#660815, sequence#660817, R#661751, exper#660828, exper_set#660827, residue#661752]. Attribute(s) with the same name appear in the operation: R,residue. Please check if the right attribute(s) are used.;; Project [ipi#660546, description#660547, symbol#660548, residue#660731, group_description#660716, total_residues_detected#660809L, num_datasets#660810L, R#660811] +- Filter (max_R#661746 >= cast(4.0 as double)) +- Project [ipi#660546, description#660547, symbol#660548, residue#660731, group_description#660716, total_residues_detected#660809L, num_datasets#660810L, R#660811, max_R#661746] +- Join Inner, ((((ipi#660546 = ipi#661747) && (description#660547 = description#661748)) && (symbol#660548 = symbol#661749)) && (residue#660731 = residue#661752)) :- SubqueryAlias `group_averages` : +- Filter (num_datasets#660810L > cast(1 as bigint)) : +- Aggregate [ipi#660546, description#660547, symbol#660548, residue#660731, exper_set#660559, group_description#660716, total_residues_detected#660809L], [ipi#660546, description#660547, symbol#660548, residue#660731, group_description#660716, total_residues_detected#660809L, count(R#660758) AS num_datasets#660810L, CASE WHEN (stddev_samp(R#660758) < (cast(0.6 as double) * avg(R#660758))) THEN avg(R#660758) ELSE CASE WHEN (min(R#660758) < cast(4 as double)) THEN min(R#660758) ELSE avg(R#660758) END END AS R#660811] : +- Project [ipi#660546, description#660547, symbol#660548, exper_set#660559, exper#660560, residue#660731, group_description#660716, R#660758, total_residues_detected#660809L] : +- Join Inner, (((ipi#660546 = ipi#660814) && (description#660547 = description#660815)) && (symbol#660548 = symbol#660816)) : :- SubqueryAlias `table_by_residue` : : +- Aggregate [ipi#660546, description#660547, symbol#660548, residue#660731, exper_set#660559, exper#660560, group_description#660716], [ipi#660546, description#660547, symbol#660548, exper_set#660559, exper#660560, residue#660731, group_description#660716, CASE WHEN (stddev_samp(R#660730) < (cast(0.6 as double) * avg(R#660730))) THEN avg(R#660730) ELSE CASE WHEN (min(R#660730) < cast(4 as double)) THEN min(R#660730) ELSE avg(R#660730) END END AS R#660758] : : +- Join Inner, (exper#660560 = Cimage link#660715) : : :- SubqueryAlias `table_by_peptide` : : : +- Project [ipi#660546, symbol#660548, description#660547, sequence#660549, R#660730, exper#660560, exper_set#660559, residue#660731] : : : +- Sort [ipi#660546 ASC NULLS FIRST], true : : : +- Aggregate [exper#660560, ipi#660546, ((instr(protein_sequence#660699, regexp_replace(sequence#660549, [.*-], )) + instr(sequence#660549, *)) - 3), symbol#660548, exper_set#660559, sp#660696, sequence#660549, charge#660551, description#660547], [ipi#660546, symbol#660548, description#660547, sequence#660549, avg(cast(IR#660553 as double)) AS R#660730, exper#660560, exper_set#660559, ((instr(protein_sequence#660699, regexp_replace(sequence#660549, [.*-], )) + instr(sequence#660549, *)) - 3) AS residue#660731] : : : +- Filter (((exper_set#660559 = Metabolites) && NOT sp#660696 LIKE Reverse%) && (sequence#660549 LIKE %C*% && (cast(R2#660556 as double) > cast(0.8 as double)))) : : : +- Join Inner, (ipi#660546 = ipi#660697) : : : :- SubqueryAlias `uniprot_sequences` : : : : +- Relation[sp#660696,ipi#660697,origin_protein_description#660698,protein_sequence#660699] csv : : : +- SubqueryAlias `joined_spectral` : : : +- Project [ipi#660546, description#660547, symbol#660548, sequence#660549, mass#660550, charge#660551, segment#660552, IR#660553, INT#660554, NP#660555, R2#660556, entry#660557, exper#660560, exper_set#660559] : : : +- Filter (NOT ipi#660546 LIKE %Reverse% && ((spectral_counts#660646L > cast(1 as bigint)) || (cast(IR#660553 as int) < 20))) : : : +- Join Inner, (((sequence#660549 = sequence#660591) && (exper_set#660559 = exper_set#660592)) && (exper#660560 = exper#660593)) : : : :- SubqueryAlias `raw_output_dta` : : : : +- Relation[index#660545,ipi#660546,description#660547,symbol#660548,sequence#660549,mass#660550,charge#660551,segment#660552,IR#660553,INT#660554,NP#660555,R2#660556,entry#660557,link#660558,exper_set#660559,exper#660560] parquet : : : +- SubqueryAlias `spectral_data` : : : +- Aggregate [exper_set#660592, exper#660593, Sequence#660591], [Sequence#660591, exper#660593, exper_set#660592, count(Sequence#660591) AS spectral_counts#660646L] : : : +- SubqueryAlias `clean` : : : +- Project [locus#660577, sequence_count#660578, spectrum_count#660579, sequence_coverage#660580, length#660581, molwt#660582, pi#660583, validation_status#660584, nsaf#660585, empai#660586, descriptive_name#660587, hredundancy#660588, lredundancy#660589, mredundancy#660590, Sequence#660591, exper_set#660592, exper#660593] : : : +- Filter Sequence#660591 RLIKE ^[KR-].[^.]+(?:[KR]..|..-)$ : : : +- SubqueryAlias `raw_spectral_dta` : : : +- Relation[locus#660577,sequence_count#660578,spectrum_count#660579,sequence_coverage#660580,length#660581,molwt#660582,pi#660583,validation_status#660584,nsaf#660585,empai#660586,descriptive_name#660587,hredundancy#660588,lredundancy#660589,mredundancy#660590,Sequence#660591,exper_set#660592,exper#660593] parquet : : +- SubqueryAlias `group_names_separated` : : +- Relation[Group ##660714,Cimage link#660715,group_description#660716,cell_type#660717] csv : +- SubqueryAlias `occurances_table` : +- Aggregate [ipi#660814, description#660815, symbol#660816], [count(distinct residue#660731) AS total_residues_detected#660809L, ipi#660814, description#660815, symbol#660816] : +- SubqueryAlias `table_by_residue` : +- Aggregate [ipi#660814, description#660815, symbol#660816, residue#660731, exper_set#660827, exper#660828, group_description#660716], [ipi#660814, description#660815, symbol#660816, exper_set#660827, exper#660828, residue#660731, group_description#660716, CASE WHEN (stddev_samp(R#660730) < (cast(0.6 as double) * avg(R#660730))) THEN avg(R#660730) ELSE CASE WHEN (min(R#660730) < cast(4 as double)) THEN min(R#660730) ELSE avg(R#660730) END END AS R#660758] : +- Join Inner, (exper#660828 = Cimage link#660715) : :- SubqueryAlias `table_by_peptide` : : +- Project [ipi#660814, symbol#660816, description#660815, sequence#660817, R#660730, exper#660828, exper_set#660827, residue#660731] : : +- Sort [ipi#660814 ASC NULLS FIRST], true : : +- Aggregate [exper#660828, ipi#660814, ((instr(protein_sequence#660699, regexp_replace(sequence#660817, [.*-], )) + instr(sequence#660817, *)) - 3), symbol#660816, exper_set#660827, sp#660696, sequence#660817, charge#660819, description#660815], [ipi#660814, symbol#660816, description#660815, sequence#660817, avg(cast(IR#660821 as double)) AS R#660730, exper#660828, exper_set#660827, ((instr(protein_sequence#660699, regexp_replace(sequence#660817, [.*-], )) + instr(sequence#660817, *)) - 3) AS residue#660731] : : +- Filter (((exper_set#660827 = Metabolites) && NOT sp#660696 LIKE Reverse%) && (sequence#660817 LIKE %C*% && (cast(R2#660824 as double) > cast(0.8 as double)))) : : +- Join Inner, (ipi#660814 = ipi#660697) : : :- SubqueryAlias `uniprot_sequences` : : : +- Relation[sp#660696,ipi#660697,origin_protein_description#660698,protein_sequence#660699] csv : : +- SubqueryAlias `joined_spectral` : : +- Project [ipi#660814, description#660815, symbol#660816, sequence#660817, mass#660818, charge#660819, segment#660820, IR#660821, INT#660822, NP#660823, R2#660824, entry#660825, exper#660828, exper_set#660827] : : +- Filter (NOT ipi#660814 LIKE %Reverse% && ((spectral_counts#660646L > cast(1 as bigint)) || (cast(IR#660821 as int) < 20))) : : +- Join Inner, (((sequence#660817 = sequence#660591) && (exper_set#660827 = exper_set#660592)) && (exper#660828 = exper#660593)) : : :- SubqueryAlias `raw_output_dta` : : : +- Relation[index#660813,ipi#660814,description#660815,symbol#660816,sequence#660817,mass#660818,charge#660819,segment#660820,IR#660821,INT#660822,NP#660823,R2#660824,entry#660825,link#660826,exper_set#660827,exper#660828] parquet : : +- SubqueryAlias `spectral_data` : : +- Aggregate [exper_set#660592, exper#660593, Sequence#660591], [Sequence#660591, exper#660593, exper_set#660592, count(Sequence#660591) AS spectral_counts#660646L] : : +- SubqueryAlias `clean` : : +- Project [locus#660577, sequence_count#660578, spectrum_count#660579, sequence_coverage#660580, length#660581, molwt#660582, pi#660583, validation_status#660584, nsaf#660585, empai#660586, descriptive_name#660587, hredundancy#660588, lredundancy#660589, mredundancy#660590, Sequence#660591, exper_set#660592, exper#660593] : : +- Filter Sequence#660591 RLIKE ^[KR-].[^.]+(?:[KR]..|..-)$ : : +- SubqueryAlias `raw_spectral_dta` : : +- Relation[locus#660577,sequence_count#660578,spectrum_count#660579,sequence_coverage#660580,length#660581,molwt#660582,pi#660583,validation_status#660584,nsaf#660585,empai#660586,descriptive_name#660587,hredundancy#660588,lredundancy#660589,mredundancy#660590,Sequence#660591,exper_set#660592,exper#660593] parquet : +- SubqueryAlias `group_names_separated` : +- Relation[Group ##660714,Cimage link#660715,group_description#660716,cell_type#660717] csv +- SubqueryAlias `all_rows_bigger_than_four` +- Aggregate [ipi#660546, description#660547, symbol#660548, residue#661752], [max(R#660811) AS max_R#661746, ipi#660546 AS ipi#661747, description#660547 AS description#661748, symbol#660548 AS symbol#661749, residue#661752] +- SubqueryAlias `group_averages` +- Filter (num_datasets#660810L > cast(1 as bigint)) +- Aggregate [ipi#660546, description#660547, symbol#660548, residue#661752, exper_set#660559, group_description#660716, total_residues_detected#660809L], [ipi#660546, description#660547, symbol#660548, residue#661752, group_description#660716, total_residues_detected#660809L, count(R#660758) AS num_datasets#660810L, CASE WHEN (stddev_samp(R#660758) < (cast(0.6 as double) * avg(R#660758))) THEN avg(R#660758) ELSE CASE WHEN (min(R#660758) < cast(4 as double)) THEN min(R#660758) ELSE avg(R#660758) END END AS R#660811] +- Project [ipi#660546, description#660547, symbol#660548, exper_set#660559, exper#660560, residue#661752, group_description#660716, R#660758, total_residues_detected#660809L] +- Join Inner, (((ipi#660546 = ipi#660814) && (description#660547 = description#660815)) && (symbol#660548 = symbol#660816)) :- SubqueryAlias `table_by_residue` : +- Aggregate [ipi#660546, description#660547, symbol#660548, residue#661752, exper_set#660559, exper#660560, group_description#660716], [ipi#660546, description#660547, symbol#660548, exper_set#660559, exper#660560, residue#661752, group_description#660716, CASE WHEN (stddev_samp(R#661751) < (cast(0.6 as double) * avg(R#661751))) THEN avg(R#661751) ELSE CASE WHEN (min(R#661751) < cast(4 as double)) THEN min(R#661751) ELSE avg(R#661751) END END AS R#660758] : +- Join Inner, (exper#660560 = Cimage link#660715) : :- SubqueryAlias `table_by_peptide` : : +- Project [ipi#660546, symbol#660548, description#660547, sequence#660549, R#661751, exper#660560, exper_set#660559, residue#661752] : : +- Sort [ipi#660546 ASC NULLS FIRST], true : : +- Aggregate [exper#660560, ipi#660546, ((instr(protein_sequence#660699, regexp_replace(sequence#660549, [.*-], )) + instr(sequence#660549, *)) - 3), symbol#660548, exper_set#660559, sp#660696, sequence#660549, charge#660551, description#660547], [ipi#660546, symbol#660548, description#660547, sequence#660549, avg(cast(IR#660553 as double)) AS R#661751, exper#660560, exper_set#660559, ((instr(protein_sequence#660699, regexp_replace(sequence#660549, [.*-], )) + instr(sequence#660549, *)) - 3) AS residue#661752] : : +- Filter (((exper_set#660559 = Metabolites) && NOT sp#660696 LIKE Reverse%) && (sequence#660549 LIKE %C*% && (cast(R2#660556 as double) > cast(0.8 as double)))) : : +- Join Inner, (ipi#660546 = ipi#660697) : : :- SubqueryAlias `uniprot_sequences` : : : +- Relation[sp#660696,ipi#660697,origin_protein_description#660698,protein_sequence#660699] csv : : +- SubqueryAlias `joined_spectral` : : +- Project [ipi#660546, description#660547, symbol#660548, sequence#660549, mass#660550, charge#660551, segment#660552, IR#660553, INT#660554, NP#660555, R2#660556, entry#660557, exper#660560, exper_set#660559] : : +- Filter (NOT ipi#660546 LIKE %Reverse% && ((spectral_counts#660646L > cast(1 as bigint)) || (cast(IR#660553 as int) < 20))) : : +- Join Inner, (((sequence#660549 = sequence#660591) && (exper_set#660559 = exper_set#660592)) && (exper#660560 = exper#660593)) : : :- SubqueryAlias `raw_output_dta` : : : +- Relation[index#660545,ipi#660546,description#660547,symbol#660548,sequence#660549,mass#660550,charge#660551,segment#660552,IR#660553,INT#660554,NP#660555,R2#660556,entry#660557,link#660558,exper_set#660559,exper#660560] parquet : : +- SubqueryAlias `spectral_data` : : +- Aggregate [exper_set#660592, exper#660593, Sequence#660591], [Sequence#660591, exper#660593, exper_set#660592, count(Sequence#660591) AS spectral_counts#660646L] : : +- SubqueryAlias `clean` : : +- Project [locus#660577, sequence_count#660578, spectrum_count#660579, sequence_coverage#660580, length#660581, molwt#660582, pi#660583, validation_status#660584, nsaf#660585, empai#660586, descriptive_name#660587, hredundancy#660588, lredundancy#660589, mredundancy#660590, Sequence#660591, exper_set#660592, exper#660593] : : +- Filter Sequence#660591 RLIKE ^[KR-].[^.]+(?:[KR]..|..-)$ : : +- SubqueryAlias `raw_spectral_dta` : : +- Relation[locus#660577,sequence_count#660578,spectrum_count#660579,sequence_coverage#660580,length#660581,molwt#660582,pi#660583,validation_status#660584,nsaf#660585,empai#660586,descriptive_name#660587,hredundancy#660588,lredundancy#660589,mredundancy#660590,Sequence#660591,exper_set#660592,exper#660593] parquet : +- SubqueryAlias `group_names_separated` : +- Relation[Group ##660714,Cimage link#660715,group_description#660716,cell_type#660717] csv +- SubqueryAlias `occurances_table` +- Aggregate [ipi#660814, description#660815, symbol#660816], [count(distinct residue#661752) AS total_residues_detected#660809L, ipi#660814, description#660815, symbol#660816] +- SubqueryAlias `table_by_residue` +- Aggregate [ipi#660814, description#660815, symbol#660816, residue#661752, exper_set#660827, exper#660828, group_description#660716], [ipi#660814, description#660815, symbol#660816, exper_set#660827, exper#660828, residue#661752, group_description#660716, CASE WHEN (stddev_samp(R#661751) < (cast(0.6 as double) * avg(R#661751))) THEN avg(R#661751) ELSE CASE WHEN (min(R#661751) < cast(4 as double)) THEN min(R#661751) ELSE avg(R#661751) END END AS R#660758] +- Join Inner, (exper#660828 = Cimage link#660715) :- SubqueryAlias `table_by_peptide` : +- !Project [ipi#660814, symbol#660816, description#660815, sequence#660817, R#661751, exper#660828, exper_set#660827, residue#661752] : +- Sort [ipi#660814 ASC NULLS FIRST], true : +- Aggregate [exper#660828, ipi#660814, ((instr(protein_sequence#660699, regexp_replace(sequence#660817, [.*-], )) + instr(sequence#660817, *)) - 3), symbol#660816, exper_set#660827, sp#660696, sequence#660817, charge#660819, description#660815], [ipi#660814, symbol#660816, description#660815, sequence#660817, avg(cast(IR#660821 as double)) AS R#660730, exper#660828, exper_set#660827, ((instr(protein_sequence#660699, regexp_replace(sequence#660817, [.*-], )) + instr(sequence#660817, *)) - 3) AS residue#660731] : +- Filter (((exper_set#660827 = Metabolites) && NOT sp#660696 LIKE Reverse%) && (sequence#660817 LIKE %C*% && (cast(R2#660824 as double) > cast(0.8 as double)))) : +- Join Inner, (ipi#660814 = ipi#660697) : :- SubqueryAlias `uniprot_sequences` : : +- Relation[sp#660696,ipi#660697,origin_protein_description#660698,protein_sequence#660699] csv : +- SubqueryAlias `joined_spectral` : +- Project [ipi#660814, description#660815, symbol#660816, sequence#660817, mass#660818, charge#660819, segment#660820, IR#660821, INT#660822, NP#660823, R2#660824, entry#660825, exper#660828, exper_set#660827] : +- Filter (NOT ipi#660814 LIKE %Reverse% && ((spectral_counts#660646L > cast(1 as bigint)) || (cast(IR#660821 as int) < 20))) : +- Join Inner, (((sequence#660817 = sequence#660591) && (exper_set#660827 = exper_set#660592)) && (exper#660828 = exper#660593)) : :- SubqueryAlias `raw_output_dta` : : +- Relation[index#660813,ipi#660814,description#660815,symbol#660816,sequence#660817,mass#660818,charge#660819,segment#660820,IR#660821,INT#660822,NP#660823,R2#660824,entry#660825,link#660826,exper_set#660827,exper#660828] parquet : +- SubqueryAlias `spectral_data` : +- Aggregate [exper_set#660592, exper#660593, Sequence#660591], [Sequence#660591, exper#660593, exper_set#660592, count(Sequence#660591) AS spectral_counts#660646L] : +- SubqueryAlias `clean` : +- Project [locus#660577, sequence_count#660578, spectrum_count#660579, sequence_coverage#660580, length#660581, molwt#660582, pi#660583, validation_status#660584, nsaf#660585, empai#660586, descriptive_name#660587, hredundancy#660588, lredundancy#660589, mredundancy#660590, Sequence#660591, exper_set#660592, exper#660593] : +- Filter Sequence#660591 RLIKE ^[KR-].[^.]+(?:[KR]..|..-)$ : +- SubqueryAlias `raw_spectral_dta` : +- Relation[locus#660577,sequence_count#660578,spectrum_count#660579,sequence_coverage#660580,length#660581,molwt#660582,pi#660583,validation_status#660584,nsaf#660585,empai#660586,descriptive_name#660587,hredundancy#660588,lredundancy#660589,mredundancy#660590,Sequence#660591,exper_set#660592,exper#660593] parquet +- SubqueryAlias `group_names_separated` +- Relation[Group ##660714,Cimage link#660715,group_description#660716,cell_type#660717] csv at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.failAnalysis(CheckAnalysis.scala:42) at org.apache.spark.sql.catalyst.analysis.Analyzer.failAnalysis(Analyzer.scala:95) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:326) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:85) at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:127) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126) at scala.collection.immutable.List.foreach(List.scala:392) at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:126) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126) at scala.collection.immutable.List.foreach(List.scala:392) at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:126) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126) at scala.collection.immutable.List.foreach(List.scala:392) at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:126) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126) at scala.collection.immutable.List.foreach(List.scala:392) at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:126) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126) at scala.collection.immutable.List.foreach(List.scala:392) at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:126) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126) at scala.collection.immutable.List.foreach(List.scala:392) at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:126) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126) at scala.collection.immutable.List.foreach(List.scala:392) at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:126) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126) at scala.collection.immutable.List.foreach(List.scala:392) at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:126) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126) at scala.collection.immutable.List.foreach(List.scala:392) at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:126) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126) at scala.collection.immutable.List.foreach(List.scala:392) at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:126) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126) at scala.collection.immutable.List.foreach(List.scala:392) at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:126) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126) at scala.collection.immutable.List.foreach(List.scala:392) at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:126) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126) at scala.collection.immutable.List.foreach(List.scala:392) at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:126) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126) at scala.collection.immutable.List.foreach(List.scala:392) at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:126) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126) at scala.collection.immutable.List.foreach(List.scala:392) at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:126) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126) at scala.collection.immutable.List.foreach(List.scala:392) at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:126) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:126) at scala.collection.immutable.List.foreach(List.scala:392) at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:126) at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.checkAnalysis(CheckAnalysis.scala:85) at org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:95) at org.apache.spark.sql.catalyst.analysis.Analyzer$$anonfun$executeAndCheck$1.apply(Analyzer.scala:108) at org.apache.spark.sql.catalyst.analysis.Analyzer$$anonfun$executeAndCheck$1.apply(Analyzer.scala:105) at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:201) at org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:105) at org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:57) at org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:55) at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:47) at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:78) at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:642) at sun.reflect.GeneratedMethodAccessor90.invoke(Unknown Source) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357) at py4j.Gateway.invoke(Gateway.java:282) at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) at py4j.commands.CallCommand.execute(CallCommand.java:79) at py4j.GatewayConnection.run(GatewayConnection.java:238) at java.lang.Thread.run(Thread.java:748) During handling of the above exception, another exception occurred: AnalysisException Traceback (most recent call last) <ipython-input-295-d3f078949c8c> in <module> 9 ) AS all_rows_bigger_than_four 10 WHERE all_rows_bigger_than_four.max_R >= 4.0 ---> 11 """) 12 filtered_group_averages.registerTempTable("filtered_group_averages") 13 sql.sql("""SELECT * FROM filtered_group_averages /usr/local/spark/python/pyspark/sql/context.py in sql(self, sqlQuery) 356 [Row(f1=1, f2=u'row1'), Row(f1=2, f2=u'row2'), Row(f1=3, f2=u'row3')] 357 """ --> 358 return self.sparkSession.sql(sqlQuery) 359 360 @since(1.0) /usr/local/spark/python/pyspark/sql/session.py in sql(self, sqlQuery) 765 [Row(f1=1, f2=u'row1'), Row(f1=2, f2=u'row2'), Row(f1=3, f2=u'row3')] 766 """ --> 767 return DataFrame(self._jsparkSession.sql(sqlQuery), self._wrapped) 768 769 @since(2.0) /usr/local/spark/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py in __call__(self, *args) 1255 answer = self.gateway_client.send_command(command) 1256 return_value = get_return_value( -> 1257 answer, self.gateway_client, self.target_id, self.name) 1258 1259 for temp_arg in temp_args: /usr/local/spark/python/pyspark/sql/utils.py in deco(*a, **kw) 67 e.java_exception.getStackTrace())) 68 if s.startswith('org.apache.spark.sql.AnalysisException: '): ---> 69 raise AnalysisException(s.split(': ', 1)[1], stackTrace) 70 if s.startswith('org.apache.spark.sql.catalyst.analysis'): 71 raise AnalysisException(s.split(': ', 1)[1], stackTrace) AnalysisException: 'Resolved attribute(s) R#661751,residue#661752 missing from ipi#660814,residue#660731,exper_set#660827,R#660730,description#660815,sequence#660817,exper#660828,symbol#660816 in operator !Project [ipi#660814, symbol#660816, description#660815, sequence#660817, R#661751, exper#660828, exper_set#660827, residue#661752]. Attribute(s) with the same name appear in the operation: R,residue. Please check if the right attribute(s) are used.;;\nProject [ipi#660546, description#660547, symbol#660548, residue#660731, group_description#660716, total_residues_detected#660809L, num_datasets#660810L, R#660811]\n+- Filter (max_R#661746 >= cast(4.0 as double))\n +- Project [ipi#660546, description#660547, symbol#660548, residue#660731, group_description#660716, total_residues_detected#660809L, num_datasets#660810L, R#660811, max_R#661746]\n +- Join Inner, ((((ipi#660546 = ipi#661747) && (description#660547 = description#661748)) && (symbol#660548 = symbol#661749)) && (residue#660731 = residue#661752))\n :- SubqueryAlias `group_averages`\n : +- Filter (num_datasets#660810L > cast(1 as bigint))\n : +- Aggregate [ipi#660546, description#660547, symbol#660548, residue#660731, exper_set#660559, group_description#660716, total_residues_detected#660809L], [ipi#660546, description#660547, symbol#660548, residue#660731, group_description#660716, total_residues_detected#660809L, count(R#660758) AS num_datasets#660810L, CASE WHEN (stddev_samp(R#660758) < (cast(0.6 as double) * avg(R#660758))) THEN avg(R#660758) ELSE CASE WHEN (min(R#660758) < cast(4 as double)) THEN min(R#660758) ELSE avg(R#660758) END END AS R#660811]\n : +- Project [ipi#660546, description#660547, symbol#660548, exper_set#660559, exper#660560, residue#660731, group_description#660716, R#660758, total_residues_detected#660809L]\n : +- Join Inner, (((ipi#660546 = ipi#660814) && (description#660547 = description#660815)) && (symbol#660548 = symbol#660816))\n : :- SubqueryAlias `table_by_residue`\n : : +- Aggregate [ipi#660546, description#660547, symbol#660548, residue#660731, exper_set#660559, exper#660560, group_description#660716], [ipi#660546, description#660547, symbol#660548, exper_set#660559, exper#660560, residue#660731, group_description#660716, CASE WHEN (stddev_samp(R#660730) < (cast(0.6 as double) * avg(R#660730))) THEN avg(R#660730) ELSE CASE WHEN (min(R#660730) < cast(4 as double)) THEN min(R#660730) ELSE avg(R#660730) END END AS R#660758]\n : : +- Join Inner, (exper#660560 = Cimage link#660715)\n : : :- SubqueryAlias `table_by_peptide`\n : : : +- Project [ipi#660546, symbol#660548, description#660547, sequence#660549, R#660730, exper#660560, exper_set#660559, residue#660731]\n : : : +- Sort [ipi#660546 ASC NULLS FIRST], true\n : : : +- Aggregate [exper#660560, ipi#660546, ((instr(protein_sequence#660699, regexp_replace(sequence#660549, [.*-], )) + instr(sequence#660549, *)) - 3), symbol#660548, exper_set#660559, sp#660696, sequence#660549, charge#660551, description#660547], [ipi#660546, symbol#660548, description#660547, sequence#660549, avg(cast(IR#660553 as double)) AS R#660730, exper#660560, exper_set#660559, ((instr(protein_sequence#660699, regexp_replace(sequence#660549, [.*-], )) + instr(sequence#660549, *)) - 3) AS residue#660731]\n : : : +- Filter (((exper_set#660559 = Metabolites) && NOT sp#660696 LIKE Reverse%) && (sequence#660549 LIKE %C*% && (cast(R2#660556 as double) > cast(0.8 as double))))\n : : : +- Join Inner, (ipi#660546 = ipi#660697)\n : : : :- SubqueryAlias `uniprot_sequences`\n : : : : +- Relation[sp#660696,ipi#660697,origin_protein_description#660698,protein_sequence#660699] csv\n : : : +- SubqueryAlias `joined_spectral`\n : : : +- Project [ipi#660546, description#660547, symbol#660548, sequence#660549, mass#660550, charge#660551, segment#660552, IR#660553, INT#660554, NP#660555, R2#660556, entry#660557, exper#660560, exper_set#660559]\n : : : +- Filter (NOT ipi#660546 LIKE %Reverse% && ((spectral_counts#660646L > cast(1 as bigint)) || (cast(IR#660553 as int) < 20)))\n : : : +- Join Inner, (((sequence#660549 = sequence#660591) && (exper_set#660559 = exper_set#660592)) && (exper#660560 = exper#660593))\n : : : :- SubqueryAlias `raw_output_dta`\n : : : : +- Relation[index#660545,ipi#660546,description#660547,symbol#660548,sequence#660549,mass#660550,charge#660551,segment#660552,IR#660553,INT#660554,NP#660555,R2#660556,entry#660557,link#660558,exper_set#660559,exper#660560] parquet\n : : : +- SubqueryAlias `spectral_data`\n : : : +- Aggregate [exper_set#660592, exper#660593, Sequence#660591], [Sequence#660591, exper#660593, exper_set#660592, count(Sequence#660591) AS spectral_counts#660646L]\n : : : +- SubqueryAlias `clean`\n : : : +- Project [locus#660577, sequence_count#660578, spectrum_count#660579, sequence_coverage#660580, length#660581, molwt#660582, pi#660583, validation_status#660584, nsaf#660585, empai#660586, descriptive_name#660587, hredundancy#660588, lredundancy#660589, mredundancy#660590, Sequence#660591, exper_set#660592, exper#660593]\n : : : +- Filter Sequence#660591 RLIKE ^[KR-].[^.]+(?:[KR]..|..-)$\n : : : +- SubqueryAlias `raw_spectral_dta`\n : : : +- Relation[locus#660577,sequence_count#660578,spectrum_count#660579,sequence_coverage#660580,length#660581,molwt#660582,pi#660583,validation_status#660584,nsaf#660585,empai#660586,descriptive_name#660587,hredundancy#660588,lredundancy#660589,mredundancy#660590,Sequence#660591,exper_set#660592,exper#660593] parquet\n : : +- SubqueryAlias `group_names_separated`\n : : +- Relation[Group ##660714,Cimage link#660715,group_description#660716,cell_type#660717] csv\n : +- SubqueryAlias `occurances_table`\n : +- Aggregate [ipi#660814, description#660815, symbol#660816], [count(distinct residue#660731) AS total_residues_detected#660809L, ipi#660814, description#660815, symbol#660816]\n : +- SubqueryAlias `table_by_residue`\n : +- Aggregate [ipi#660814, description#660815, symbol#660816, residue#660731, exper_set#660827, exper#660828, group_description#660716], [ipi#660814, description#660815, symbol#660816, exper_set#660827, exper#660828, residue#660731, group_description#660716, CASE WHEN (stddev_samp(R#660730) < (cast(0.6 as double) * avg(R#660730))) THEN avg(R#660730) ELSE CASE WHEN (min(R#660730) < cast(4 as double)) THEN min(R#660730) ELSE avg(R#660730) END END AS R#660758]\n : +- Join Inner, (exper#660828 = Cimage link#660715)\n : :- SubqueryAlias `table_by_peptide`\n : : +- Project [ipi#660814, symbol#660816, description#660815, sequence#660817, R#660730, exper#660828, exper_set#660827, residue#660731]\n : : +- Sort [ipi#660814 ASC NULLS FIRST], true\n : : +- Aggregate [exper#660828, ipi#660814, ((instr(protein_sequence#660699, regexp_replace(sequence#660817, [.*-], )) + instr(sequence#660817, *)) - 3), symbol#660816, exper_set#660827, sp#660696, sequence#660817, charge#660819, description#660815], [ipi#660814, symbol#660816, description#660815, sequence#660817, avg(cast(IR#660821 as double)) AS R#660730, exper#660828, exper_set#660827, ((instr(protein_sequence#660699, regexp_replace(sequence#660817, [.*-], )) + instr(sequence#660817, *)) - 3) AS residue#660731]\n : : +- Filter (((exper_set#660827 = Metabolites) && NOT sp#660696 LIKE Reverse%) && (sequence#660817 LIKE %C*% && (cast(R2#660824 as double) > cast(0.8 as double))))\n : : +- Join Inner, (ipi#660814 = ipi#660697)\n : : :- SubqueryAlias `uniprot_sequences`\n : : : +- Relation[sp#660696,ipi#660697,origin_protein_description#660698,protein_sequence#660699] csv\n : : +- SubqueryAlias `joined_spectral`\n : : +- Project [ipi#660814, description#660815, symbol#660816, sequence#660817, mass#660818, charge#660819, segment#660820, IR#660821, INT#660822, NP#660823, R2#660824, entry#660825, exper#660828, exper_set#660827]\n : : +- Filter (NOT ipi#660814 LIKE %Reverse% && ((spectral_counts#660646L > cast(1 as bigint)) || (cast(IR#660821 as int) < 20)))\n : : +- Join Inner, (((sequence#660817 = sequence#660591) && (exper_set#660827 = exper_set#660592)) && (exper#660828 = exper#660593))\n : : :- SubqueryAlias `raw_output_dta`\n : : : +- Relation[index#660813,ipi#660814,description#660815,symbol#660816,sequence#660817,mass#660818,charge#660819,segment#660820,IR#660821,INT#660822,NP#660823,R2#660824,entry#660825,link#660826,exper_set#660827,exper#660828] parquet\n : : +- SubqueryAlias `spectral_data`\n : : +- Aggregate [exper_set#660592, exper#660593, Sequence#660591], [Sequence#660591, exper#660593, exper_set#660592, count(Sequence#660591) AS spectral_counts#660646L]\n : : +- SubqueryAlias `clean`\n : : +- Project [locus#660577, sequence_count#660578, spectrum_count#660579, sequence_coverage#660580, length#660581, molwt#660582, pi#660583, validation_status#660584, nsaf#660585, empai#660586, descriptive_name#660587, hredundancy#660588, lredundancy#660589, mredundancy#660590, Sequence#660591, exper_set#660592, exper#660593]\n : : +- Filter Sequence#660591 RLIKE ^[KR-].[^.]+(?:[KR]..|..-)$\n : : +- SubqueryAlias `raw_spectral_dta`\n : : +- Relation[locus#660577,sequence_count#660578,spectrum_count#660579,sequence_coverage#660580,length#660581,molwt#660582,pi#660583,validation_status#660584,nsaf#660585,empai#660586,descriptive_name#660587,hredundancy#660588,lredundancy#660589,mredundancy#660590,Sequence#660591,exper_set#660592,exper#660593] parquet\n : +- SubqueryAlias `group_names_separated`\n : +- Relation[Group ##660714,Cimage link#660715,group_description#660716,cell_type#660717] csv\n +- SubqueryAlias `all_rows_bigger_than_four`\n +- Aggregate [ipi#660546, description#660547, symbol#660548, residue#661752], [max(R#660811) AS max_R#661746, ipi#660546 AS ipi#661747, description#660547 AS description#661748, symbol#660548 AS symbol#661749, residue#661752]\n +- SubqueryAlias `group_averages`\n +- Filter (num_datasets#660810L > cast(1 as bigint))\n +- Aggregate [ipi#660546, description#660547, symbol#660548, residue#661752, exper_set#660559, group_description#660716, total_residues_detected#660809L], [ipi#660546, description#660547, symbol#660548, residue#661752, group_description#660716, total_residues_detected#660809L, count(R#660758) AS num_datasets#660810L, CASE WHEN (stddev_samp(R#660758) < (cast(0.6 as double) * avg(R#660758))) THEN avg(R#660758) ELSE CASE WHEN (min(R#660758) < cast(4 as double)) THEN min(R#660758) ELSE avg(R#660758) END END AS R#660811]\n +- Project [ipi#660546, description#660547, symbol#660548, exper_set#660559, exper#660560, residue#661752, group_description#660716, R#660758, total_residues_detected#660809L]\n +- Join Inner, (((ipi#660546 = ipi#660814) && (description#660547 = description#660815)) && (symbol#660548 = symbol#660816))\n :- SubqueryAlias `table_by_residue`\n : +- Aggregate [ipi#660546, description#660547, symbol#660548, residue#661752, exper_set#660559, exper#660560, group_description#660716], [ipi#660546, description#660547, symbol#660548, exper_set#660559, exper#660560, residue#661752, group_description#660716, CASE WHEN (stddev_samp(R#661751) < (cast(0.6 as double) * avg(R#661751))) THEN avg(R#661751) ELSE CASE WHEN (min(R#661751) < cast(4 as double)) THEN min(R#661751) ELSE avg(R#661751) END END AS R#660758]\n : +- Join Inner, (exper#660560 = Cimage link#660715)\n : :- SubqueryAlias `table_by_peptide`\n : : +- Project [ipi#660546, symbol#660548, description#660547, sequence#660549, R#661751, exper#660560, exper_set#660559, residue#661752]\n : : +- Sort [ipi#660546 ASC NULLS FIRST], true\n : : +- Aggregate [exper#660560, ipi#660546, ((instr(protein_sequence#660699, regexp_replace(sequence#660549, [.*-], )) + instr(sequence#660549, *)) - 3), symbol#660548, exper_set#660559, sp#660696, sequence#660549, charge#660551, description#660547], [ipi#660546, symbol#660548, description#660547, sequence#660549, avg(cast(IR#660553 as double)) AS R#661751, exper#660560, exper_set#660559, ((instr(protein_sequence#660699, regexp_replace(sequence#660549, [.*-], )) + instr(sequence#660549, *)) - 3) AS residue#661752]\n : : +- Filter (((exper_set#660559 = Metabolites) && NOT sp#660696 LIKE Reverse%) && (sequence#660549 LIKE %C*% && (cast(R2#660556 as double) > cast(0.8 as double))))\n : : +- Join Inner, (ipi#660546 = ipi#660697)\n : : :- SubqueryAlias `uniprot_sequences`\n : : : +- Relation[sp#660696,ipi#660697,origin_protein_description#660698,protein_sequence#660699] csv\n : : +- SubqueryAlias `joined_spectral`\n : : +- Project [ipi#660546, description#660547, symbol#660548, sequence#660549, mass#660550, charge#660551, segment#660552, IR#660553, INT#660554, NP#660555, R2#660556, entry#660557, exper#660560, exper_set#660559]\n : : +- Filter (NOT ipi#660546 LIKE %Reverse% && ((spectral_counts#660646L > cast(1 as bigint)) || (cast(IR#660553 as int) < 20)))\n : : +- Join Inner, (((sequence#660549 = sequence#660591) && (exper_set#660559 = exper_set#660592)) && (exper#660560 = exper#660593))\n : : :- SubqueryAlias `raw_output_dta`\n : : : +- Relation[index#660545,ipi#660546,description#660547,symbol#660548,sequence#660549,mass#660550,charge#660551,segment#660552,IR#660553,INT#660554,NP#660555,R2#660556,entry#660557,link#660558,exper_set#660559,exper#660560] parquet\n : : +- SubqueryAlias `spectral_data`\n : : +- Aggregate [exper_set#660592, exper#660593, Sequence#660591], [Sequence#660591, exper#660593, exper_set#660592, count(Sequence#660591) AS spectral_counts#660646L]\n : : +- SubqueryAlias `clean`\n : : +- Project [locus#660577, sequence_count#660578, spectrum_count#660579, sequence_coverage#660580, length#660581, molwt#660582, pi#660583, validation_status#660584, nsaf#660585, empai#660586, descriptive_name#660587, hredundancy#660588, lredundancy#660589, mredundancy#660590, Sequence#660591, exper_set#660592, exper#660593]\n : : +- Filter Sequence#660591 RLIKE ^[KR-].[^.]+(?:[KR]..|..-)$\n : : +- SubqueryAlias `raw_spectral_dta`\n : : +- Relation[locus#660577,sequence_count#660578,spectrum_count#660579,sequence_coverage#660580,length#660581,molwt#660582,pi#660583,validation_status#660584,nsaf#660585,empai#660586,descriptive_name#660587,hredundancy#660588,lredundancy#660589,mredundancy#660590,Sequence#660591,exper_set#660592,exper#660593] parquet\n : +- SubqueryAlias `group_names_separated`\n : +- Relation[Group ##660714,Cimage link#660715,group_description#660716,cell_type#660717] csv\n +- SubqueryAlias `occurances_table`\n +- Aggregate [ipi#660814, description#660815, symbol#660816], [count(distinct residue#661752) AS total_residues_detected#660809L, ipi#660814, description#660815, symbol#660816]\n +- SubqueryAlias `table_by_residue`\n +- Aggregate [ipi#660814, description#660815, symbol#660816, residue#661752, exper_set#660827, exper#660828, group_description#660716], [ipi#660814, description#660815, symbol#660816, exper_set#660827, exper#660828, residue#661752, group_description#660716, CASE WHEN (stddev_samp(R#661751) < (cast(0.6 as double) * avg(R#661751))) THEN avg(R#661751) ELSE CASE WHEN (min(R#661751) < cast(4 as double)) THEN min(R#661751) ELSE avg(R#661751) END END AS R#660758]\n +- Join Inner, (exper#660828 = Cimage link#660715)\n :- SubqueryAlias `table_by_peptide`\n : +- !Project [ipi#660814, symbol#660816, description#660815, sequence#660817, R#661751, exper#660828, exper_set#660827, residue#661752]\n : +- Sort [ipi#660814 ASC NULLS FIRST], true\n : +- Aggregate [exper#660828, ipi#660814, ((instr(protein_sequence#660699, regexp_replace(sequence#660817, [.*-], )) + instr(sequence#660817, *)) - 3), symbol#660816, exper_set#660827, sp#660696, sequence#660817, charge#660819, description#660815], [ipi#660814, symbol#660816, description#660815, sequence#660817, avg(cast(IR#660821 as double)) AS R#660730, exper#660828, exper_set#660827, ((instr(protein_sequence#660699, regexp_replace(sequence#660817, [.*-], )) + instr(sequence#660817, *)) - 3) AS residue#660731]\n : +- Filter (((exper_set#660827 = Metabolites) && NOT sp#660696 LIKE Reverse%) && (sequence#660817 LIKE %C*% && (cast(R2#660824 as double) > cast(0.8 as double))))\n : +- Join Inner, (ipi#660814 = ipi#660697)\n : :- SubqueryAlias `uniprot_sequences`\n : : +- Relation[sp#660696,ipi#660697,origin_protein_description#660698,protein_sequence#660699] csv\n : +- SubqueryAlias `joined_spectral`\n : +- Project [ipi#660814, description#660815, symbol#660816, sequence#660817, mass#660818, charge#660819, segment#660820, IR#660821, INT#660822, NP#660823, R2#660824, entry#660825, exper#660828, exper_set#660827]\n : +- Filter (NOT ipi#660814 LIKE %Reverse% && ((spectral_counts#660646L > cast(1 as bigint)) || (cast(IR#660821 as int) < 20)))\n : +- Join Inner, (((sequence#660817 = sequence#660591) && (exper_set#660827 = exper_set#660592)) && (exper#660828 = exper#660593))\n : :- SubqueryAlias `raw_output_dta`\n : : +- Relation[index#660813,ipi#660814,description#660815,symbol#660816,sequence#660817,mass#660818,charge#660819,segment#660820,IR#660821,INT#660822,NP#660823,R2#660824,entry#660825,link#660826,exper_set#660827,exper#660828] parquet\n : +- SubqueryAlias `spectral_data`\n : +- Aggregate [exper_set#660592, exper#660593, Sequence#660591], [Sequence#660591, exper#660593, exper_set#660592, count(Sequence#660591) AS spectral_counts#660646L]\n : +- SubqueryAlias `clean`\n : +- Project [locus#660577, sequence_count#660578, spectrum_count#660579, sequence_coverage#660580, length#660581, molwt#660582, pi#660583, validation_status#660584, nsaf#660585, empai#660586, descriptive_name#660587, hredundancy#660588, lredundancy#660589, mredundancy#660590, Sequence#660591, exper_set#660592, exper#660593]\n : +- Filter Sequence#660591 RLIKE ^[KR-].[^.]+(?:[KR]..|..-)$\n : +- SubqueryAlias `raw_spectral_dta`\n : +- Relation[locus#660577,sequence_count#660578,spectrum_count#660579,sequence_coverage#660580,length#660581,molwt#660582,pi#660583,validation_status#660584,nsaf#660585,empai#660586,descriptive_name#660587,hredundancy#660588,lredundancy#660589,mredundancy#660590,Sequence#660591,exper_set#660592,exper#660593] parquet\n +- SubqueryAlias `group_names_separated`\n +- Relation[Group ##660714,Cimage link#660715,group_description#660716,cell_type#660717] csv\n'
while,
SELECT group_averages.* FROM group_averages NATURAL INNER JOIN ( SELECT MAX(R) AS max_R, ipi AS ipi, description AS description, symbol AS symbol, residue AS residue FROM group_averages GROUP BY ipi, description, symbol, residue ) AS all_rows_bigger_than_four WHERE all_rows_bigger_than_four.max_R >= 4.0
works